library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(gtools)
library(gmodels)
## Warning: package 'gmodels' was built under R version 4.2.3
library(ggplot2)
library(class)
library(tidyr)

Memanggil Data dan Membentuk Data Frame

dt2 <- read_excel("D:/SEM 6/BISMILLAH LOMBA/SATDAT JUARA PART 2/Data Cadangan(2).xlsx")
head(dt2)
## # A tibble: 6 × 14
##     age   sex    cp trestbps  chol   fbs restecg thalach exang oldpeak slope
##   <dbl> <dbl> <dbl>    <dbl> <dbl> <dbl>   <dbl>   <dbl> <dbl>   <dbl> <dbl>
## 1    69     1     0      160   234     1       2     131     0     0.1     1
## 2    69     0     0      140   239     0       0     151     0     1.8     0
## 3    66     0     0      150   226     0       0     114     0     2.6     2
## 4    65     1     0      138   282     1       2     174     0     1.4     1
## 5    64     1     0      110   211     0       2     144     1     1.8     1
## 6    64     1     0      170   227     0       2     155     0     0.6     1
## # … with 3 more variables: ca <dbl>, thal <dbl>, condition <dbl>
X1 = dt2$age
X2 = as.factor(dt2$sex)
X3 = as.factor(dt2$cp)
X4 = as.factor(dt2$trestbps)
X5 = as.factor(dt2$chol)
X6 = as.factor(dt2$fbs)
X7 = as.factor(dt2$restecg)
X8 = as.factor(dt2$thalach)
X9 = as.factor(dt2$exang)
X10 = as.factor(dt2$oldpeak)
X11 = as.factor(dt2$slope)
X12 = as.factor(dt2$ca)
X13 = as.factor(dt2$thal)
Y = as.factor(dt2$condition)
df = data.frame(X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,Y)
str(df)
## 'data.frame':    297 obs. of  14 variables:
##  $ X1 : num  69 69 66 65 64 64 63 61 60 59 ...
##  $ X2 : Factor w/ 2 levels "0","1": 2 1 1 2 2 2 2 2 1 2 ...
##  $ X3 : Factor w/ 4 levels "0","1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
##  $ X4 : Factor w/ 50 levels "94","100","101",..: 41 29 35 28 9 44 32 25 35 47 ...
##  $ X5 : Factor w/ 152 levels "126","131","141",..: 66 70 58 109 43 59 65 66 71 100 ...
##  $ X6 : Factor w/ 2 levels "0","1": 2 1 1 2 1 1 2 1 1 1 ...
##  $ X7 : Factor w/ 3 levels "0","1","2": 3 1 1 3 3 3 3 1 1 3 ...
##  $ X8 : Factor w/ 91 levels "71","88","90",..: 32 51 16 74 44 55 50 45 71 45 ...
##  $ X9 : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 1 1 1 1 ...
##  $ X10: Factor w/ 40 levels "0","0.1","0.2",..: 2 18 26 15 18 7 23 26 10 37 ...
##  $ X11: Factor w/ 3 levels "0","1","2": 2 1 3 2 2 2 3 2 1 3 ...
##  $ X12: Factor w/ 4 levels "0","1","2","3": 2 3 1 2 1 1 1 3 1 1 ...
##  $ X13: Factor w/ 3 levels "0","1","2": 1 1 1 1 1 3 2 1 1 3 ...
##  $ Y  : Factor w/ 2 levels "0","1": 1 1 1 2 1 1 1 2 1 1 ...

Cek Missing Values

colSums(is.na(df))
##  X1  X2  X3  X4  X5  X6  X7  X8  X9 X10 X11 X12 X13   Y 
##   0   0   0   0   0   0   0   0   0   0   0   0   0   0

Data Proportion

table(df$Y)
## 
##   0   1 
## 160 137
prop.table(table(df$Y))
## 
##         0         1 
## 0.5387205 0.4612795

Data Expolaration

ggplot(df,mapping = aes(df$X1))+
  geom_histogram(fill="blue",col="black",bins=20)
## Warning: Use of `df$X1` is discouraged.
## ℹ Use `X1` instead.

ggplot(df,mapping = aes(df$X1,fill=df$Y))+
  geom_density(alpha=0.1)
## Warning: Use of `df$X1` is discouraged.
## ℹ Use `X1` instead.
## Warning: Use of `df$Y` is discouraged.
## ℹ Use `Y` instead.

ggplot(df,mapping = aes(Y,X1))+geom_boxplot(fill="blue",col="black")

ggplot(df,mapping = aes(X2,fill=Y))+
  geom_bar(col="black")+
  geom_text(stat="count",aes(label=..count..),
            position=position_stack(0.5), color="white", size=4) 
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.

# Cross Validation

set.seed(305)

# Set splitting ratio
intrain <- sample(nrow(df), nrow(df)*0.8)

# make train and test data
df_train <- df[intrain,]
df_test <- df[-intrain,]

# check proportion
prop.table(table(df_train$Y))
## 
##         0         1 
## 0.5527426 0.4472574
prop.table(table(df_test$Y))
## 
##         0         1 
## 0.4833333 0.5166667

Model Creation

MODEL 1

Model yang pertama yang akan kita lakukan adalah menggunakan seluruh variabel prediktor yang baik yang berkelas integer dan factor, lalu kita akan melakukan stepwise method untuk menentukan kombinasi terbaik dari variabel prediktor yang ada.

# stepwise: automatic model selection method
options(warn=-1)
log_model_all <- glm(Y ~ ., family="binomial", data= df_train)
log_model_nothing <- glm(Y ~ 1, family="binomial", data= df_train)

log_model1 <- step(log_model_nothing, 
                 list(lower=formula(log_model_nothing),
                      upper=formula(log_model_all)),
                 direction="both", trace = F, test= "F")
formula(log_model1)
## Y ~ X13 + X12 + X3 + X11 + X2 + X9
log_model1 <- glm(Y ~ X13 + X12 + X3 + X11 + X2 + X9,
                  family = "binomial", data = df_train)
summary(log_model1)
## 
## Call:
## glm(formula = Y ~ X13 + X12 + X3 + X11 + X2 + X9, family = "binomial", 
##     data = df_train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.9298  -0.5220  -0.1965   0.3894   2.7542  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -4.9024     0.9794  -5.006 5.57e-07 ***
## X131          0.2286     0.8439   0.271 0.786454    
## X132          1.7365     0.4585   3.788 0.000152 ***
## X121          2.0084     0.5167   3.887 0.000102 ***
## X122          3.2813     0.8148   4.027 5.65e-05 ***
## X123          3.1016     1.0024   3.094 0.001975 ** 
## X31           1.0512     0.8462   1.242 0.214164    
## X32           0.1090     0.7864   0.139 0.889781    
## X33           1.9250     0.7465   2.579 0.009915 ** 
## X111          1.5227     0.4813   3.164 0.001557 ** 
## X112          1.5153     0.7935   1.910 0.056196 .  
## X21           1.1324     0.5206   2.175 0.029609 *  
## X91           0.8554     0.4694   1.822 0.068421 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 325.91  on 236  degrees of freedom
## Residual deviance: 162.65  on 224  degrees of freedom
## AIC: 188.65
## 
## Number of Fisher Scoring iterations: 6

Menghilangkan peubah p value yang besar

log_model1 <- update(log_model1, .~.-X13-X3)
summary(log_model1)
## 
## Call:
## glm(formula = Y ~ X12 + X11 + X2 + X9, family = "binomial", data = df_train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4885  -0.5054  -0.2375   0.6710   2.6767  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -3.5542     0.5039  -7.053 1.75e-12 ***
## X121          1.9128     0.4451   4.298 1.73e-05 ***
## X122          3.3587     0.6794   4.944 7.67e-07 ***
## X123          3.2887     0.8308   3.958 7.54e-05 ***
## X111          1.5609     0.3962   3.939 8.17e-05 ***
## X112          1.4668     0.6540   2.243 0.024910 *  
## X21           1.4570     0.4046   3.601 0.000317 ***
## X91           1.6735     0.3967   4.219 2.46e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 325.91  on 236  degrees of freedom
## Residual deviance: 197.72  on 229  degrees of freedom
## AIC: 213.72
## 
## Number of Fisher Scoring iterations: 5

Model 2

Lalu pada model kedua kita akan menggunakaan seluruh data yang kelasnya berupa factor, bukan berupa angka.

log_model2 <- glm(formula = Y ~ X2 + X3 + X6 + X7 + X9 + X11 + X12 + X13, family = "binomial", 
                  data = df_train)
summary(log_model2)
## 
## Call:
## glm(formula = Y ~ X2 + X3 + X6 + X7 + X9 + X11 + X12 + X13, family = "binomial", 
##     data = df_train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.8845  -0.4989  -0.1635   0.3719   2.7353  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -5.0358     1.0234  -4.921 8.62e-07 ***
## X21           1.1388     0.5236   2.175 0.029648 *  
## X31           1.0714     0.8579   1.249 0.211711    
## X32           0.1802     0.7949   0.227 0.820628    
## X33           1.8761     0.7527   2.493 0.012684 *  
## X61          -0.5911     0.6532  -0.905 0.365499    
## X71           0.5302     3.0879   0.172 0.863659    
## X72           0.3293     0.4134   0.797 0.425719    
## X91           0.9108     0.4732   1.925 0.054251 .  
## X111          1.5079     0.4856   3.105 0.001902 ** 
## X112          1.4519     0.7994   1.816 0.069339 .  
## X121          1.9804     0.5209   3.802 0.000144 ***
## X122          3.4682     0.8588   4.039 5.38e-05 ***
## X123          3.2147     1.0369   3.100 0.001933 ** 
## X131          0.3684     0.8651   0.426 0.670202    
## X132          1.7662     0.4641   3.806 0.000141 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 325.91  on 236  degrees of freedom
## Residual deviance: 161.22  on 221  degrees of freedom
## AIC: 193.22
## 
## Number of Fisher Scoring iterations: 6

Model 3

Lalu terakhir kita akan menggunakan kembali seluruh variabel prediktor yang nantinya akan dilakukan permodelan menggunakan metode stepwise AIC.

log_model3 <- glm(formula = Y ~ ., family = "binomial", data = df)
summary(log_model3)
## 
## Call:
## glm(formula = Y ~ ., family = "binomial", data = df)
## 
## Deviance Residuals: 
##   [1]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##  [26]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##  [51]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##  [76]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [101]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [126]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [151]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [176]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [201]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [226]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [251]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [276]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## 
## Coefficients: (49 not defined because of singularities)
##               Estimate Std. Error z value Pr(>|z|)
## (Intercept)  2.454e+03  2.018e+08       0        1
## X1           3.511e+00  2.924e+05       0        1
## X21          2.828e+02  2.199e+07       0        1
## X31         -7.631e+02  6.282e+07       0        1
## X32          1.182e+03  1.016e+08       0        1
## X33         -3.257e+02  2.810e+07       0        1
## X4100       -7.339e+03  6.270e+08       0        1
## X4101       -4.529e+03  3.858e+08       0        1
## X4102       -1.698e+03  1.557e+08       0        1
## X4104        6.716e+02  7.904e+07       0        1
## X4105       -2.489e+03  2.076e+08       0        1
## X4106       -3.112e+03  2.686e+08       0        1
## X4108       -4.329e+03  3.652e+08       0        1
## X4110       -3.179e+03  2.677e+08       0        1
## X4112       -6.624e+03  5.524e+08       0        1
## X4114        4.626e+02  3.204e+07       0        1
## X4115       -2.619e+03  1.973e+08       0        1
## X4117       -7.999e+03  6.607e+08       0        1
## X4118       -5.152e+03  4.360e+08       0        1
## X4120       -4.405e+03  3.629e+08       0        1
## X4122       -4.714e+03  4.107e+08       0        1
## X4123       -4.666e+03  3.888e+08       0        1
## X4124       -4.138e+03  3.383e+08       0        1
## X4125       -5.285e+03  4.401e+08       0        1
## X4126       -2.360e+03  2.173e+08       0        1
## X4128       -2.347e+03  1.940e+08       0        1
## X4129        1.658e+03  1.528e+08       0        1
## X4130       -2.957e+03  2.404e+08       0        1
## X4132        4.500e+03  3.986e+08       0        1
## X4134       -2.400e+03  2.012e+08       0        1
## X4135       -5.528e+03  4.607e+08       0        1
## X4136       -2.118e+03  1.682e+08       0        1
## X4138       -2.883e+03  2.375e+08       0        1
## X4140       -3.271e+03  2.730e+08       0        1
## X4142       -4.187e+03  3.428e+08       0        1
## X4144       -4.426e+03  3.670e+08       0        1
## X4145       -2.106e+03  1.776e+08       0        1
## X4146        6.247e+03  5.145e+08       0        1
## X4148       -5.138e+03  4.330e+08       0        1
## X4150       -3.160e+03  2.654e+08       0        1
## X4152       -3.164e+03  2.664e+08       0        1
## X4154       -2.047e+03  1.783e+08       0        1
## X4155       -1.263e+03  9.830e+07       0        1
## X4156        3.857e+02  3.600e+07       0        1
## X4158       -2.261e+03  1.850e+08       0        1
## X4160       -5.392e+03  4.505e+08       0        1
## X4164       -5.237e+03  4.403e+08       0        1
## X4165       -1.182e+04  9.918e+08       0        1
## X4170        2.251e+03  1.991e+08       0        1
## X4172       -4.040e+03  3.422e+08       0        1
## X4174       -7.856e+03  6.569e+08       0        1
## X4178        8.977e+01  9.282e+06       0        1
## X4180        8.497e+01  1.831e+07       0        1
## X4192       -5.661e+02  4.026e+07       0        1
## X4200        2.893e+03  2.752e+08       0        1
## X5131        1.850e+03  1.553e+08       0        1
## X5141        1.789e+03  1.567e+08       0        1
## X5149        5.220e+03  4.383e+08       0        1
## X5157        3.131e+03  2.577e+08       0        1
## X5160       -1.656e+03  1.484e+08       0        1
## X5164        6.788e+03  5.645e+08       0        1
## X5166        9.899e+02  8.037e+07       0        1
## X5167        7.835e+02  6.083e+07       0        1
## X5168        6.749e+02  5.990e+07       0        1
## X5169        3.994e+03  3.259e+08       0        1
## X5172        2.847e+03  2.405e+08       0        1
## X5174       -2.351e+02  1.770e+07       0        1
## X5175       -5.930e+02  4.033e+07       0        1
## X5176               NA         NA      NA       NA
## X5177        9.836e+02  8.041e+07       0        1
## X5178        1.207e+03  1.008e+08       0        1
## X5180       -1.502e+02  1.853e+07       0        1
## X5182        4.345e+03  3.735e+08       0        1
## X5183        5.726e+02  5.237e+07       0        1
## X5184       -1.402e+01  1.335e+07       0        1
## X5185        3.595e+03  2.968e+08       0        1
## X5186        2.622e+03  2.319e+08       0        1
## X5187        3.209e+03  2.652e+08       0        1
## X5188        3.409e+02  2.345e+07       0        1
## X5192        4.251e+03  3.760e+08       0        1
## X5193        2.876e+03  2.366e+08       0        1
## X5195        4.073e+03  3.440e+08       0        1
## X5196       -4.546e+03  3.869e+08       0        1
## X5197        4.459e+03  3.649e+08       0        1
## X5198        2.450e+03  2.131e+08       0        1
## X5199       -1.059e+03  8.650e+07       0        1
## X5200        1.488e+03  1.141e+08       0        1
## X5201        2.772e+03  2.373e+08       0        1
## X5203        4.683e+03  3.915e+08       0        1
## X5204        1.271e+03  1.058e+08       0        1
## X5205        3.954e+03  3.536e+08       0        1
## X5206        1.368e+03  1.068e+08       0        1
## X5207       -6.026e+03  5.133e+08       0        1
## X5208        1.543e+02  4.948e+06       0        1
## X5209        2.997e+03  2.524e+08       0        1
## X5210        3.315e+03  2.872e+08       0        1
## X5211        2.734e+03  2.315e+08       0        1
## X5212        1.213e+03  1.018e+08       0        1
## X5213        2.146e+03  1.887e+08       0        1
## X5214        1.107e+03  9.433e+07       0        1
## X5215        2.022e+03  1.622e+08       0        1
## X5216        2.972e+03  2.401e+08       0        1
## X5217       -4.141e+02  3.634e+07       0        1
## X5218       -1.474e+03  1.035e+08       0        1
## X5219        2.348e+03  1.907e+08       0        1
## X5220        3.659e+03  3.026e+08       0        1
## X5221        2.409e+03  2.037e+08       0        1
## X5222        3.929e+03  3.351e+08       0        1
## X5223        1.859e+03  1.676e+08       0        1
## X5224       -5.778e+03  5.011e+08       0        1
## X5225       -1.657e+03  1.521e+08       0        1
## X5226        8.719e+02  6.893e+07       0        1
## X5227       -4.590e+02  4.297e+07       0        1
## X5228       -3.266e+03  2.783e+08       0        1
## X5229       -4.422e+02  3.499e+07       0        1
## X5230        5.874e+03  4.891e+08       0        1
## X5231        1.721e+03  1.369e+08       0        1
## X5232        2.371e+03  2.032e+08       0        1
## X5233        7.887e+02  7.120e+07       0        1
## X5234        2.775e+03  2.307e+08       0        1
## X5235       -1.098e+03  8.472e+07       0        1
## X5236        3.169e+03  2.560e+08       0        1
## X5237       -1.542e+03  1.363e+08       0        1
## X5239        2.835e+03  2.382e+08       0        1
## X5240        2.775e+03  2.298e+08       0        1
## X5241       -1.424e+03  1.265e+08       0        1
## X5242        4.385e+03  3.730e+08       0        1
## X5243        4.849e+03  4.059e+08       0        1
## X5244        3.604e+03  3.020e+08       0        1
## X5245       -1.164e+03  1.043e+08       0        1
## X5246        1.910e+03  1.613e+08       0        1
## X5247        5.903e+03  5.002e+08       0        1
## X5248        3.115e+03  2.657e+08       0        1
## X5249        5.888e+03  4.853e+08       0        1
## X5250        5.275e+03  4.434e+08       0        1
## X5252        2.721e+03  2.243e+08       0        1
## X5253        1.587e+03  1.297e+08       0        1
## X5254        2.480e+03  2.023e+08       0        1
## X5255        1.214e+03  9.631e+07       0        1
## X5256        1.362e+03  1.074e+08       0        1
## X5257        1.014e+03  6.491e+07       0        1
## X5258        3.203e+03  2.661e+08       0        1
## X5259        7.271e+02  7.233e+07       0        1
## X5260        4.390e+02  2.342e+07       0        1
## X5261        1.766e+03  1.377e+08       0        1
## X5262        4.774e+03  3.953e+08       0        1
## X5263        4.653e+03  3.942e+08       0        1
## X5264        1.985e+03  1.635e+08       0        1
## X5265        1.211e+03  1.153e+08       0        1
## X5266        6.469e+03  5.404e+08       0        1
## X5267        8.933e+02  8.095e+07       0        1
## X5268        3.185e+03  2.635e+08       0        1
## X5269        1.862e+03  1.583e+08       0        1
## X5270        6.557e+02  5.643e+07       0        1
## X5271        1.524e+03  1.271e+08       0        1
## X5273        3.891e+03  3.245e+08       0        1
## X5274       -3.258e+03  2.865e+08       0        1
## X5275       -2.557e+03  2.272e+08       0        1
## X5276        1.954e+03  1.736e+08       0        1
## X5277       -7.278e+01  6.536e+06       0        1
## X5278       -5.148e+03  4.226e+08       0        1
## X5281        2.517e+02  2.197e+07       0        1
## X5282        1.384e+03  1.189e+08       0        1
## X5283       -1.155e+03  1.017e+08       0        1
## X5284        4.488e+03  3.673e+08       0        1
## X5286        3.536e+03  2.858e+08       0        1
## X5288       -5.219e+03  4.675e+08       0        1
## X5289        6.595e+03  5.521e+08       0        1
## X5290        2.369e+03  1.993e+08       0        1
## X5293        2.876e+03  2.370e+08       0        1
## X5294       -3.591e+02  2.728e+07       0        1
## X5295        3.272e+03  2.688e+08       0        1
## X5298        1.044e+03  8.218e+07       0        1
## X5299        2.399e+03  2.086e+08       0        1
## X5300        8.444e+03  7.053e+08       0        1
## X5302        4.341e+03  3.693e+08       0        1
## X5303        1.624e+03  1.266e+08       0        1
## X5304        3.350e+03  2.769e+08       0        1
## X5305        2.509e+03  2.006e+08       0        1
## X5306        1.731e+03  1.723e+08       0        1
## X5307       -9.047e+02  8.217e+07       0        1
## X5308        2.014e+03  1.681e+08       0        1
## X5309        3.965e+03  3.201e+08       0        1
## X5311       -5.773e+02  4.534e+07       0        1
## X5313       -2.299e+03  1.993e+08       0        1
## X5315       -1.248e+02  1.858e+07       0        1
## X5318       -3.586e+02  1.820e+07       0        1
## X5319        5.508e+03  4.486e+08       0        1
## X5321        4.755e+02  3.879e+07       0        1
## X5322        5.943e+03  4.967e+08       0        1
## X5325        1.201e+03  8.706e+07       0        1
## X5326       -5.794e+03  5.034e+08       0        1
## X5327        2.590e+02  2.760e+07       0        1
## X5330        1.624e+03  1.278e+08       0        1
## X5335        9.347e+02  7.472e+07       0        1
## X5340        1.200e+03  9.324e+07       0        1
## X5341       -3.426e+03  3.096e+08       0        1
## X5342       -9.044e+03  7.854e+08       0        1
## X5353       -6.113e+03  5.365e+08       0        1
## X5354        2.925e+03  2.518e+08       0        1
## X5360        4.979e+03  4.165e+08       0        1
## X5394        3.628e+03  3.021e+08       0        1
## X5407        5.629e+03  4.644e+08       0        1
## X5409        7.924e+02  6.610e+07       0        1
## X5417        2.777e+03  2.308e+08       0        1
## X5564        9.541e+02  5.679e+07       0        1
## X61         -2.662e+02  2.185e+07       0        1
## X71         -3.350e+03  2.932e+08       0        1
## X72         -4.163e+02  3.474e+07       0        1
## X888         3.501e+03  3.108e+08       0        1
## X890                NA         NA      NA       NA
## X895                NA         NA      NA       NA
## X896        -4.085e+02  3.683e+07       0        1
## X897        -5.148e+03  4.461e+08       0        1
## X899         1.993e+02  9.609e+06       0        1
## X8103        1.503e+03  1.278e+08       0        1
## X8105       -7.362e+03  6.236e+08       0        1
## X8106        1.188e+03  9.463e+07       0        1
## X8108       -1.058e+03  7.466e+07       0        1
## X8109       -5.200e+03  4.376e+08       0        1
## X8111        3.971e+02  3.872e+07       0        1
## X8112       -1.319e+03  1.124e+08       0        1
## X8113        5.878e+02  5.918e+07       0        1
## X8114       -4.241e+02  2.457e+07       0        1
## X8115       -1.814e+03  1.570e+08       0        1
## X8116       -1.691e+03  1.108e+08       0        1
## X8117               NA         NA      NA       NA
## X8118       -4.355e+02  2.300e+07       0        1
## X8120        5.920e+02  5.024e+07       0        1
## X8121       -1.412e+03  1.304e+08       0        1
## X8122       -1.047e+03  7.857e+07       0        1
## X8123               NA         NA      NA       NA
## X8124               NA         NA      NA       NA
## X8125       -1.000e+03  8.025e+07       0        1
## X8126       -2.942e+03  2.407e+08       0        1
## X8127        8.504e+02  7.858e+07       0        1
## X8128       -3.832e+03  3.161e+08       0        1
## X8129        1.934e+03  1.584e+08       0        1
## X8130       -1.262e+03  1.150e+08       0        1
## X8131       -3.508e+03  2.828e+08       0        1
## X8132       -1.301e+03  1.037e+08       0        1
## X8133               NA         NA      NA       NA
## X8134               NA         NA      NA       NA
## X8136       -3.355e+03  2.775e+08       0        1
## X8137       -9.620e+03  8.047e+08       0        1
## X8138        6.404e+03  5.487e+08       0        1
## X8139               NA         NA      NA       NA
## X8140        6.570e+02  6.734e+07       0        1
## X8141       -8.075e+02  6.322e+07       0        1
## X8142       -2.168e+03  1.785e+08       0        1
## X8143       -1.051e+03  7.811e+07       0        1
## X8144       -2.838e+03  2.303e+08       0        1
## X8145       -3.300e+03  2.710e+08       0        1
## X8146       -2.928e+03  2.399e+08       0        1
## X8147       -2.615e+03  2.102e+08       0        1
## X8148       -3.716e+03  3.159e+08       0        1
## X8149       -4.082e+03  3.394e+08       0        1
## X8150       -9.856e+02  7.896e+07       0        1
## X8151       -2.287e+03  1.871e+08       0        1
## X8152       -4.577e+03  3.797e+08       0        1
## X8153        2.132e+03  1.794e+08       0        1
## X8154       -4.376e+03  3.563e+08       0        1
## X8155       -4.011e+03  3.319e+08       0        1
## X8156       -1.826e+03  1.316e+08       0        1
## X8157       -1.939e+03  1.571e+08       0        1
## X8158       -1.781e+03  1.456e+08       0        1
## X8159       -1.216e+03  8.812e+07       0        1
## X8160       -1.817e+03  1.475e+08       0        1
## X8161       -2.144e+03  1.719e+08       0        1
## X8162       -1.015e+03  7.905e+07       0        1
## X8163       -1.232e+03  1.058e+08       0        1
## X8164       -2.055e+03  1.677e+08       0        1
## X8165       -2.930e+03  2.441e+08       0        1
## X8166        3.007e+03  2.609e+08       0        1
## X8167               NA         NA      NA       NA
## X8168       -1.823e+03  1.564e+08       0        1
## X8169       -5.666e+02  4.407e+07       0        1
## X8170       -1.409e+03  1.134e+08       0        1
## X8171       -5.331e+03  4.430e+08       0        1
## X8172       -6.616e+02  5.053e+07       0        1
## X8173       -2.402e+03  2.050e+08       0        1
## X8174       -1.660e+03  1.364e+08       0        1
## X8175       -9.239e+02  7.540e+07       0        1
## X8177       -3.404e+03  2.709e+08       0        1
## X8178       -1.860e+02  4.298e+06       0        1
## X8179       -2.740e+03  2.283e+08       0        1
## X8180        6.858e+02  5.427e+07       0        1
## X8181       -1.220e+03  1.085e+08       0        1
## X8182       -8.704e+02  6.771e+07       0        1
## X8184       -3.523e+03  3.138e+08       0        1
## X8185               NA         NA      NA       NA
## X8186       -1.406e+03  9.944e+07       0        1
## X8187       -6.393e+03  5.391e+08       0        1
## X8188       -1.129e+03  8.930e+07       0        1
## X8190               NA         NA      NA       NA
## X8192               NA         NA      NA       NA
## X8194       -1.421e+03  1.161e+08       0        1
## X8195               NA         NA      NA       NA
## X8202               NA         NA      NA       NA
## X91          7.103e+02  5.889e+07       0        1
## X100.1       3.801e+03  3.153e+08       0        1
## X100.2       1.683e+03  1.503e+08       0        1
## X100.3       1.032e+03  8.642e+07       0        1
## X100.4       9.859e+01  1.850e+06       0        1
## X100.5       2.252e+03  1.891e+08       0        1
## X100.6      -3.531e+02  3.224e+07       0        1
## X100.7              NA         NA      NA       NA
## X100.8      -7.749e+02  6.643e+07       0        1
## X100.9       3.025e+03  2.594e+08       0        1
## X101         3.327e+03  2.840e+08       0        1
## X101.1      -7.753e+01  1.369e+07       0        1
## X101.2      -3.745e+02  2.902e+07       0        1
## X101.3              NA         NA      NA       NA
## X101.4       9.022e+02  6.901e+07       0        1
## X101.5              NA         NA      NA       NA
## X101.6              NA         NA      NA       NA
## X101.8              NA         NA      NA       NA
## X101.9              NA         NA      NA       NA
## X102                NA         NA      NA       NA
## X102.1              NA         NA      NA       NA
## X102.2              NA         NA      NA       NA
## X102.3              NA         NA      NA       NA
## X102.4              NA         NA      NA       NA
## X102.5              NA         NA      NA       NA
## X102.6              NA         NA      NA       NA
## X102.8              NA         NA      NA       NA
## X102.9              NA         NA      NA       NA
## X103                NA         NA      NA       NA
## X103.1              NA         NA      NA       NA
## X103.2              NA         NA      NA       NA
## X103.4              NA         NA      NA       NA
## X103.5              NA         NA      NA       NA
## X103.6              NA         NA      NA       NA
## X103.8              NA         NA      NA       NA
## X104                NA         NA      NA       NA
## X104.2              NA         NA      NA       NA
## X104.4              NA         NA      NA       NA
## X105.6              NA         NA      NA       NA
## X106.2              NA         NA      NA       NA
## X111                NA         NA      NA       NA
## X112                NA         NA      NA       NA
## X121                NA         NA      NA       NA
## X122                NA         NA      NA       NA
## X123                NA         NA      NA       NA
## X131                NA         NA      NA       NA
## X132                NA         NA      NA       NA
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 4.0995e+02  on 296  degrees of freedom
## Residual deviance: 1.7231e-09  on   0  degrees of freedom
## AIC: 594
## 
## Number of Fisher Scoring iterations: 25
df2 = data.frame(X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,Y)
str(df2)
## 'data.frame':    297 obs. of  14 variables:
##  $ X1 : num  69 69 66 65 64 64 63 61 60 59 ...
##  $ X2 : Factor w/ 2 levels "0","1": 2 1 1 2 2 2 2 2 1 2 ...
##  $ X3 : Factor w/ 4 levels "0","1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
##  $ X4 : Factor w/ 50 levels "94","100","101",..: 41 29 35 28 9 44 32 25 35 47 ...
##  $ X5 : Factor w/ 152 levels "126","131","141",..: 66 70 58 109 43 59 65 66 71 100 ...
##  $ X6 : Factor w/ 2 levels "0","1": 2 1 1 2 1 1 2 1 1 1 ...
##  $ X7 : Factor w/ 3 levels "0","1","2": 3 1 1 3 3 3 3 1 1 3 ...
##  $ X8 : Factor w/ 91 levels "71","88","90",..: 32 51 16 74 44 55 50 45 71 45 ...
##  $ X9 : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 1 1 1 1 ...
##  $ X10: Factor w/ 40 levels "0","0.1","0.2",..: 2 18 26 15 18 7 23 26 10 37 ...
##  $ X11: Factor w/ 3 levels "0","1","2": 2 1 3 2 2 2 3 2 1 3 ...
##  $ X12: Factor w/ 4 levels "0","1","2","3": 2 3 1 2 1 1 1 3 1 1 ...
##  $ X13: Factor w/ 3 levels "0","1","2": 1 1 1 1 1 3 2 1 1 3 ...
##  $ Y  : Factor w/ 2 levels "0","1": 1 1 1 2 1 1 1 2 1 1 ...

Model 2 using Stepwise Method

log_back_model2 <- stepAIC(log_model2, direction = "backward", trace = F)
summary(log_back_model2)
## 
## Call:
## glm(formula = Y ~ X2 + X3 + X9 + X11 + X12 + X13, family = "binomial", 
##     data = df_train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.9298  -0.5220  -0.1965   0.3894   2.7542  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -4.9024     0.9794  -5.006 5.57e-07 ***
## X21           1.1324     0.5206   2.175 0.029609 *  
## X31           1.0512     0.8462   1.242 0.214164    
## X32           0.1090     0.7864   0.139 0.889781    
## X33           1.9250     0.7465   2.579 0.009915 ** 
## X91           0.8554     0.4694   1.822 0.068421 .  
## X111          1.5227     0.4813   3.164 0.001557 ** 
## X112          1.5153     0.7935   1.910 0.056196 .  
## X121          2.0084     0.5167   3.887 0.000102 ***
## X122          3.2813     0.8148   4.027 5.65e-05 ***
## X123          3.1016     1.0024   3.094 0.001975 ** 
## X131          0.2286     0.8439   0.271 0.786454    
## X132          1.7365     0.4585   3.788 0.000152 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 325.91  on 236  degrees of freedom
## Residual deviance: 162.65  on 224  degrees of freedom
## AIC: 188.65
## 
## Number of Fisher Scoring iterations: 6

Model 3 using Stepwise Method

Sama seperti sebelumnya kita akan melakukan stepwise method untuk mendapatkan kombinasi variabel prediktor terbaik. Lalu setaelah dilakukan stepwise akan dieliminasi variabel prediktor yang tidak berpengaruh secara signifikan.

log_back_model3 <- stepAIC(log_model3, direction = "backward", trace = F)
summary(log_back_model3)
## 
## Call:
## glm(formula = Y ~ X10, family = "binomial", data = df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8930  -0.7948  -0.5168   0.8576   2.0393  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -0.99040    0.22967  -4.312 1.62e-05 ***
## X100.1         0.29725    0.89596   0.332  0.74006    
## X100.2        -0.10821    0.70512  -0.153  0.87803    
## X100.3         0.29725    1.24609   0.239  0.81146    
## X100.4        -0.95551    1.09344  -0.874  0.38219    
## X100.5        -0.39590    1.14138  -0.347  0.72870    
## X100.6         0.07411    0.63462   0.117  0.90704    
## X100.7       -16.57567 3956.18033  -0.004  0.99666    
## X100.8         0.83625    0.60189   1.389  0.16472    
## X100.9         1.68355    1.24609   1.351  0.17668    
## X101           1.80133    0.64332   2.800  0.00511 ** 
## X101.1       -16.57567 2797.44195  -0.006  0.99527    
## X101.2         1.34707    0.54370   2.478  0.01323 *  
## X101.3       -16.57567 3956.18033  -0.004  0.99666    
## X101.4         1.14455    0.60189   1.902  0.05722 .  
## X101.5        -0.39590    1.14138  -0.347  0.72870    
## X101.6         0.43078    0.66754   0.645  0.51871    
## X101.8         1.83770    0.72728   2.527  0.01151 *  
## X101.9         1.39586    0.94132   1.483  0.13811    
## X102           2.24316    0.83403   2.690  0.00715 ** 
## X102.1        18.55647 3956.18033   0.005  0.99626    
## X102.2        18.55647 1978.09018   0.009  0.99252    
## X102.3       -16.57567 2797.44195  -0.006  0.99527    
## X102.4         1.68355    1.24609   1.351  0.17668    
## X102.5        18.55647 2797.44195   0.007  0.99471    
## X102.6         2.59984    1.11926   2.323  0.02019 *  
## X102.8        18.55647 1615.10387   0.011  0.99083    
## X102.9        18.55647 3956.18033   0.005  0.99626    
## X103           2.37669    1.14138   2.082  0.03732 *  
## X103.1        18.55647 3956.18033   0.005  0.99626    
## X103.2        18.55647 2797.44195   0.007  0.99471    
## X103.4        18.55647 2284.10179   0.008  0.99352    
## X103.5       -16.57567 3956.18033  -0.004  0.99666    
## X103.6        18.55647 1978.09018   0.009  0.99252    
## X103.8        18.55647 3956.18033   0.005  0.99626    
## X104          18.55647 2284.10179   0.008  0.99352    
## X104.2         0.99040    1.43274   0.691  0.48940    
## X104.4        18.55647 3956.18034   0.005  0.99626    
## X105.6        18.55647 3956.18033   0.005  0.99626    
## X106.2        18.55647 3956.18034   0.005  0.99626    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 409.95  on 296  degrees of freedom
## Residual deviance: 308.58  on 257  degrees of freedom
## AIC: 388.58
## 
## Number of Fisher Scoring iterations: 16

Dari ketiga model di atas, kita akan mulai mengeliminasi model-model mana yang relatif kurang baik dibandingkan model lainnya. Pertama akan kita ambil nilai AIC yang terburuk.

log_model1$aic
## [1] 213.7179
log_back_model2$aic
## [1] 188.6541
log_back_model3$aic
## [1] 388.5827

Didapatkan AIC terburuk adalah pada model 3. Setelah melngecek nilai AIC, perlu juga kita memverifikasi fit dari model terhadap deviance dari statistik. Null Hypothesis disini adalah model deviasi nya tidak terdistribusi normal.

pchisq(log_model1$deviance, df = log_model1$df.residual, lower.tail = T)
## [1] 0.0664693
pchisq(log_back_model2$deviance, df = log_model1$df.residual, lower.tail = T)
## [1] 0.0003003839
pchisq(log_back_model3$deviance, df = log_model1$df.residual, lower.tail = T)
## [1] 0.999649

Dari data tersebut didapat model 3 datanya tidak terdistribusi nomal alias > 0.05 untuk itu dengan menimbang nilai AIC yang juga relatif besar, maka model 3 akan kita buang.

Setelah melakukan uji deviasi kita akan lakukan pengujian multi-colinearity untuk melihat apakah antar variabel prediktor meiliki hubungan yang kuat.

library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:gtools':
## 
##     logit
## The following object is masked from 'package:dplyr':
## 
##     recode
vif(log_model1)
##         GVIF Df GVIF^(1/(2*Df))
## X12 1.171791  3        1.026774
## X11 1.142280  2        1.033816
## X2  1.089833  1        1.043951
## X9  1.062685  1        1.030866

Tidak ada multicolienarity pada model 1 karena tidak meenuhi nilai GVIF^(1/(2*Df)).

vif(log_back_model2)
##         GVIF Df GVIF^(1/(2*Df))
## X2  1.382042  1        1.175603
## X3  1.536971  3        1.074264
## X9  1.186972  1        1.089483
## X11 1.386448  2        1.085115
## X12 1.334434  3        1.049259
## X13 1.373315  2        1.082536

Tidak ada multicolienarity pada Model 2 karena tidak meenuhi nilai GVIF^(1/(2*Df)).

library(hnp)
set.seed(305)
hnp(log_model1, halfnormal = F, paint.out = T, pch = 16, sim = 500, print.on = T)
## Binomial model

hnp(log_back_model2, halfnormal = F, paint.out = T, pch = 16, sim = 500, print.on = T)
## Binomial model

# Prediction Setelah itu akan kita lakukan prediksi untuk melihat bagaimana fit model yang telah kita bangun terhadap data-data baru. Pertama kita akan melakukan prediksi tersebut menggunakan model 1 kepada data test. lalu data hasil akan berupa tipe “response” dimana adalah berupa angka 0-1 yang merupakan probabilitas apakah prediksi mendekati

df_test$prob1 <- predict(object = log_model1, type = "response", newdata = df_test)
df_test$pred.label1 <- ifelse(test = df_test$prob1 > 0.5, yes = "Not Health", no = "Health")
df_test$Y_type <- ifelse(test = df_test$Y == "Not Health", 1, 0)

Untuk mengevaluasi kualitas model dalam mengklasifikasikan pasien menderita penyakit jantung atau tidak, kami menggunakan kurva ROC. Model yang dikembangkan menyajikan area di bawah kurva 0,90, nilai ini tidak diragukan lagi sangat baik.

library(ROCR)

df_test$prob1 <- predict(object = log_model1, type = "response", newdata = df_test)
df_test$pred.label1 <- ifelse(df_test$prob1 > 0.5, yes= "1",no = "0")
df_test$Y_type <- ifelse(df_test$Y == "0", 0, 1)

pred_model1 <- prediction(df_test$prob1, df_test$Y_type)
perf <- performance(pred_model1, "tpr", "fpr")
plot(perf, colorize = TRUE, main = "ROC Curve")
abline(0, 1, lty = 2)

performance(pred_model1,"auc")@y.values[[1]] # area below the curve
## [1] 0.8926585
df_test$prob2 <- predict(object = log_back_model2, type = "response", newdata = df_test)
df_test$pred.label2 <- ifelse(test = df_test$prob2 > 0.5, yes = "1", no = "0")

Sama seperti model 1, Model 2 yang telah dikembangkan menyajikan area di bawah kurva sebesar 0,90, nilai ini tidak diragukan lagi sangat baik.

pred_model2 <- prediction(df_test$prob2, df_test$Y_type)
perf <- performance(pred_model2,"tpr", "fpr")
plot(perf, colorize = TRUE, main="ROC Curve")
abline(0, 1, lty = 2)

performance(pred_model2,"auc")@y.values[[1]] # area below the curve
## [1] 0.932703
library(ggplot2)
ggplot(df_test, aes(x = prob1)) +
  geom_density(lwd = 0.5) +
  theme_minimal()

Pada grafik diatas, dapat diinterpretasikan bahwa hasil prediksi yang dilakukan lecondongan sama di kedua variabel target yang artinya tidak sakit dan sakit jantung hampir seimbang.

ggplot(df_test, aes(x = prob2)) +
  geom_density(lwd = 0.5) +
  theme_minimal()

Pada grafik diatas, dapat diinterpretasikan bahwa hasil prediksi yang dilakukan sedikit condong ke arah 1 yang artinya sakit. Lalu berikut adalah hasil prediksi yang telah di bangun.

library(dplyr)
df_test[,c("pred.label1", "Y", "pred.label2")] %>% sample()
##     pred.label2 Y pred.label1
## 2             0 0           0
## 3             0 0           0
## 4             0 1           1
## 8             1 1           1
## 9             0 0           0
## 18            0 0           1
## 19            0 1           0
## 20            0 0           1
## 26            0 0           0
## 41            0 0           0
## 44            0 0           0
## 45            0 0           1
## 64            0 0           0
## 65            0 0           0
## 72            0 0           0
## 91            1 0           1
## 94            0 1           0
## 99            1 1           1
## 101           1 1           1
## 103           0 0           0
## 105           0 0           0
## 106           1 0           0
## 107           1 1           1
## 110           0 0           0
## 114           0 0           0
## 115           0 0           0
## 128           0 0           0
## 140           0 0           0
## 143           0 0           0
## 145           0 0           0
## 147           0 0           0
## 155           0 0           0
## 161           1 1           1
## 163           1 1           1
## 166           1 0           0
## 173           0 0           0
## 180           1 1           1
## 185           1 1           1
## 193           1 1           1
## 195           1 1           1
## 196           0 1           0
## 201           1 1           1
## 215           1 1           1
## 217           1 1           1
## 221           1 1           0
## 225           1 1           1
## 227           1 1           1
## 229           1 1           1
## 236           1 1           1
## 243           0 1           0
## 248           1 1           1
## 249           1 1           1
## 258           1 1           0
## 261           1 1           1
## 264           1 1           0
## 274           1 1           1
## 277           0 0           0
## 278           1 1           1
## 290           0 0           0
## 297           1 1           0
library(caret)
## Loading required package: lattice
model_eval1 <- confusionMatrix(as.factor(df_test$pred.label1), as.factor(df_test$Y), positive = "1")
model_eval1
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 25  8
##          1  4 23
##                                           
##                Accuracy : 0.8             
##                  95% CI : (0.6767, 0.8922)
##     No Information Rate : 0.5167          
##     P-Value [Acc > NIR] : 5.222e-06       
##                                           
##                   Kappa : 0.6013          
##                                           
##  Mcnemar's Test P-Value : 0.3865          
##                                           
##             Sensitivity : 0.7419          
##             Specificity : 0.8621          
##          Pos Pred Value : 0.8519          
##          Neg Pred Value : 0.7576          
##              Prevalence : 0.5167          
##          Detection Rate : 0.3833          
##    Detection Prevalence : 0.4500          
##       Balanced Accuracy : 0.8020          
##                                           
##        'Positive' Class : 1               
## 
model_eval2 <- confusionMatrix(as.factor(df_test$pred.label2), as.factor(df_test$Y), positive = "1")
model_eval2
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 26  5
##          1  3 26
##                                           
##                Accuracy : 0.8667          
##                  95% CI : (0.7541, 0.9406)
##     No Information Rate : 0.5167          
##     P-Value [Acc > NIR] : 1.105e-08       
##                                           
##                   Kappa : 0.7336          
##                                           
##  Mcnemar's Test P-Value : 0.7237          
##                                           
##             Sensitivity : 0.8387          
##             Specificity : 0.8966          
##          Pos Pred Value : 0.8966          
##          Neg Pred Value : 0.8387          
##              Prevalence : 0.5167          
##          Detection Rate : 0.4333          
##    Detection Prevalence : 0.4833          
##       Balanced Accuracy : 0.8676          
##                                           
##        'Positive' Class : 1               
## 
model_eval1$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.7419355            0.8620690            0.8518519 
##       Neg Pred Value            Precision               Recall 
##            0.7575758            0.8518519            0.7419355 
##                   F1           Prevalence       Detection Rate 
##            0.7931034            0.5166667            0.3833333 
## Detection Prevalence    Balanced Accuracy 
##            0.4500000            0.8020022
Recall <- 74.19
Specificity <- 86.20
precision <- 85.18
Accuracy <- 80.20
performance1 <- cbind.data.frame(Accuracy, Recall, precision, Specificity)
performance1
##   Accuracy Recall precision Specificity
## 1     80.2  74.19     85.18        86.2
model_eval2$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.8387097            0.8965517            0.8965517 
##       Neg Pred Value            Precision               Recall 
##            0.8387097            0.8965517            0.8387097 
##                   F1           Prevalence       Detection Rate 
##            0.8666667            0.5166667            0.4333333 
## Detection Prevalence    Balanced Accuracy 
##            0.4833333            0.8676307
Recall <- 83.87
Specificity <- 89.65
precision <- 89.65
Accuracy <- 86.80
performance2 <- cbind.data.frame(Accuracy, Recall, precision, Specificity)
performance2
##   Accuracy Recall precision Specificity
## 1     86.8  83.87     89.65       89.65

Dari confusion matrix di atas, model 2 dapat menebak lebih baik apakah pasien health dan not health karena nilainya adalah sebesar 86.8%. Dari pasien yang sakit, model 2 juga lebih baik karena dapat menebak dengan sebesar 83.87% dari data aktual. Dari pasien yang health, kedua model 2 mampu menebak dengan benar pada 89.65% dari data aktual. Dari keseluruhan hasil prediksi positif yang bisa ditebak oleh model, model 2 mampu menebak dengan benar kelas positif sebesat 89,65%

Model Tuning

performa <- function(cutoff, prob, ref, posY, negY) 
{
  predict <- factor(ifelse(prob >= cutoff, posY, negY))
  conf <- caret::confusionMatrix(predict , ref, positive = posY)
  acc <- conf$overall[1]
  rec <- conf$byClass[1]
  prec <- conf$byClass[3]
  spec <- conf$byClass[2]
  mat <- t(as.matrix(c(rec , acc , prec, spec))) 
  colnames(mat) <- c("recall", "accuracy", "precicion", "specificity")
  return(mat)
}

co <- seq(0.01,0.80,length=100)
result <- matrix(0,100,4)

for(i in 1:100){
  result[i,] = performa(cutoff = co[i], 
                     prob = df_test$prob1, 
                     ref = df_test$Y, 
                     posY = "1", 
                     negY = "0")
}

data_frame("Recall" = result[,1],
           "Accuracy" = result[,2],
           "Precision" = result[,3],
           "Specificity" = result[,4],
           "Cutoff" = co) %>% 
  gather(key = "performa", value = "value", 1:4) %>% 
  ggplot(aes(x = Cutoff, y = value, col = performa)) +
  geom_line(lwd = 1.5) +
  scale_color_manual(values = c("darkred","darkgreen","orange", "blue")) +
  scale_y_continuous(breaks = seq(0,1,0.1), limits = c(0,1)) +
  scale_x_continuous(breaks = seq(0,1,0.1)) +
  labs(title = "Tradeoff model perfomance") +
  theme_minimal() +
  theme(legend.position = "top",
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank())

# Model Interpretation

options(scipen = 999)
exp(log_model1$coefficients) %>% data.frame()
##                       .
## (Intercept)  0.02860413
## X121         6.77220586
## X122        28.75065039
## X123        26.80749971
## X111         4.76306390
## X112         4.33546237
## X21          4.29309161
## X91          5.33056287