Simulasi Data Respon Biner

set.seed(111)
m   <- 500
x   <- round(runif(m,-2,3),2)
b0  <-  1.5
b1  <-  2.5
y   <- c(1:m)
n   <- round(runif(m,5,30),0)

for (i in 1:m) { y[i] <- rbinom(1,n[i],exp(b0+b1*x[i])/(1 + exp(b0+b1*x[i]))) }

y_data <- cbind(y,n-y)
p      <- round(y/n,5)

Data Hasil Simulasi

# Menampilkan 50 data pertama dari total 500 data
# untuk melihat karakteristik data yang dibangkitkan

dataku <- data.frame(x,y,n,p)
head(dataku,50)

Perbandingan Performa Model

# Perbandingan link-function
# ketika Y menyebar ExactBinomial

model_logit   <- glm(y_data ~ x, family=binomial(link="logit")) 
model_probit  <- glm(y_data ~ x, family=binomial(link="probit")) 
model_cloglog <- glm(y_data ~ x, family=binomial(link="cloglog")) 

summary(model_logit)
## 
## Call:
## glm(formula = y_data ~ x, family = binomial(link = "logit"))
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  1.57475    0.05086   30.96   <2e-16 ***
## x            2.55874    0.06109   41.89   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6088.38  on 499  degrees of freedom
## Residual deviance:  374.71  on 498  degrees of freedom
## AIC: 994.54
## 
## Number of Fisher Scoring iterations: 6
summary(model_probit)
## 
## Call:
## glm(formula = y_data ~ x, family = binomial(link = "probit"))
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.84949    0.02625   32.37   <2e-16 ***
## x            1.39915    0.02979   46.97   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6088.38  on 499  degrees of freedom
## Residual deviance:  403.53  on 498  degrees of freedom
## AIC: 1023.4
## 
## Number of Fisher Scoring iterations: 6
summary(model_cloglog)
## 
## Call:
## glm(formula = y_data ~ x, family = binomial(link = "cloglog"))
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.29645    0.02196   13.50   <2e-16 ***
## x            1.26015    0.02958   42.61   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6088.38  on 499  degrees of freedom
## Residual deviance:  638.96  on 498  degrees of freedom
## AIC: 1258.8
## 
## Number of Fisher Scoring iterations: 9
# Rekap Nilai AIC

AIC(model_logit, model_probit, model_cloglog)

Perbandingan Performa Kemampuan Memprediksi

# Menghitung metrik evaluasi

library(caret)

conf_logit  <- confusionMatrix(factor(pred_logit),  factor(data_ind$y), positive = "1")
conf_probit <- confusionMatrix(factor(pred_probit), factor(data_ind$y), positive = "1")
conf_cloglog <- confusionMatrix(factor(pred_cloglog), factor(data_ind$y), positive = "1")

hasil <- data.frame(
  Model = c("Logit", "Probit", "Cloglog"),
  Sensitivitas = c(conf_logit$byClass["Sensitivity"],
                   conf_probit$byClass["Sensitivity"],
                   conf_cloglog$byClass["Sensitivity"]),
  Akurasi = c(conf_logit$overall["Accuracy"],
              conf_probit$overall["Accuracy"],
              conf_cloglog$overall["Accuracy"]),
  Presisi = c(conf_logit$byClass["Precision"],
              conf_probit$byClass["Precision"],
              conf_cloglog$byClass["Precision"]),
  F1_Score = c(conf_logit$byClass["F1"],
               conf_probit$byClass["F1"],
               conf_cloglog$byClass["F1"])
)

Rekap Hasil Evaluasi Performa Model

print(hasil, row.names = FALSE)
##    Model Sensitivitas   Akurasi   Presisi  F1_Score
##    Logit    0.9357043 0.9004304 0.9316530 0.9336743
##   Probit    0.9357043 0.9004304 0.9316530 0.9336743
##  Cloglog    0.9276285 0.9010120 0.9394464 0.9335000