Vignette Title

Vignette Author

2022-11-23

1. Pendahuluan

1.1 Library yang Akan Digunakan

library(rlang)
library(rcompanion)
library(car)
## Loading required package: carData
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

1.2 Impor Data

titanic <- data.frame(read.csv("C:/ICA/Semester 5/PADK/titanic_clean.csv"))
data <- head(titanic)
kableExtra::kable(data, align = rep('c',9))
X Survived Pclass Sex Age SibSp Parch Fare Embarked
1 0 3 male 22.00000 1 0 7.2500 S
2 1 1 female 38.00000 1 0 71.2833 C
3 1 3 female 26.00000 0 0 7.9250 S
4 1 1 female 35.00000 1 0 53.1000 S
5 0 3 male 35.00000 0 0 8.0500 S
6 0 3 male 29.69912 0 0 8.4583 Q

1.3 Praprocessing Data

titanic$sex <- ifelse(titanic$Sex == "male", 1, 0)
titanic$Pclass1 <- ifelse(titanic$Pclass == "1", 1, 0) 
titanic$Pclass2 <- ifelse(titanic$Pclass == "2", 1, 0) 
titanic <- titanic[, c(2,3,5,10:12)]

titanic$Survived <- as.factor(titanic$Survived)
titanic$Pclass1 <- as.factor(titanic$Pclass1)
titanic$Pclass2 <- as.factor(titanic$Pclass2)
titanic$Age <- as.integer(round(titanic$Age, 0))
titanic$sex <- as.integer(titanic$sex)
str(titanic)
## 'data.frame':    889 obs. of  6 variables:
##  $ Survived: Factor w/ 2 levels "0","1": 1 2 2 2 1 1 1 1 2 2 ...
##  $ Pclass  : int  3 1 3 1 3 3 1 3 3 2 ...
##  $ Age     : int  22 38 26 35 35 30 54 2 27 14 ...
##  $ sex     : int  1 0 0 0 1 1 1 1 0 0 ...
##  $ Pclass1 : Factor w/ 2 levels "0","1": 1 2 1 2 1 1 2 1 1 1 ...
##  $ Pclass2 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...

1.4 Eksplorasi Data

library(dplyr)
titanic$survived <- as.integer(titanic$Survived)
d <- titanic %>% count(survived)

pie(c(d[1,2],d[2,2]), col = c("#DC3535", "#6ECCAF"), 
    labels = c("Tidak Selamat", "Selamat"), border = "black",
    main = "Status Penumpang Titanic")

library(ggplot2)
ggplot(titanic) +
  aes(x = Survived, fill = Pclass) +
  geom_bar() +
  scale_fill_viridis_c(option = "magma", direction = 1) +
  labs(
    x = "Status Penumpang (0: Tidak Selamat, 1: Selamat)",
    y = "Jumlah Penumpang",
    title = "Status Penumpang Titanic",
    subtitle = "Berdasarkan Kelas Tumpangan (Kelas 1, 2, 3)",
    fill = "Kelas"
  ) +
  theme_bw() +
  theme(
    legend.position = "none",
    plot.title = element_text(face = "bold",
                              hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5)
  ) +
  facet_wrap(vars(Pclass), scales = "free_x")

2. Regresi Logistik dengan Peubah Dummy

**2.1 Model Regresi Logistik

titanic.dummy <- glm(Survived~as.factor(sex)+Pclass1+Pclass2+Age, data = titanic, 
                     family = binomial(link = "logit"))
summary(titanic.dummy)
## 
## Call:
## glm(formula = Survived ~ as.factor(sex) + Pclass1 + Pclass2 + 
##     Age, family = binomial(link = "logit"), data = titanic)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.6470  -0.6683  -0.4189   0.6290   2.4272  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      1.219254   0.242457   5.029 4.94e-07 ***
## as.factor(sex)1 -2.603856   0.186876 -13.934  < 2e-16 ***
## Pclass11         2.320532   0.240896   9.633  < 2e-16 ***
## Pclass21         1.204652   0.224521   5.365 8.08e-08 ***
## Age             -0.033490   0.007381  -4.537 5.69e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1182.82  on 888  degrees of freedom
## Residual deviance:  804.71  on 884  degrees of freedom
## AIC: 814.71
## 
## Number of Fisher Scoring iterations: 5

2.2 Uji Simultan

nagelkerke(titanic.dummy) 
## $Models
##                                                                                                       
## Model: "glm, Survived ~ as.factor(sex) + Pclass1 + Pclass2 + Age, binomial(link = \"logit\"), titanic"
## Null:  "glm, Survived ~ 1, binomial(link = \"logit\"), titanic"                                       
## 
## $Pseudo.R.squared.for.model.vs.null
##                              Pseudo.R.squared
## McFadden                             0.319666
## Cox and Snell (ML)                   0.346437
## Nagelkerke (Cragg and Uhler)         0.470923
## 
## $Likelihood.ratio.test
##  Df.diff LogLik.diff  Chisq    p.value
##       -4     -189.05 378.11 1.4931e-80
## 
## $Number.of.observations
##           
## Model: 889
## Null:  889
## 
## $Messages
## [1] "Note: For models fit with REML, these statistics are based on refitting with ML"
## 
## $Warnings
## [1] "None"

2.3 Uji Parsial

Anova(titanic.dummy, type="II", test="Wald")
## Analysis of Deviance Table (Type II tests)
## 
## Response: Survived
##                Df   Chisq Pr(>Chisq)    
## as.factor(sex)  1 194.144  < 2.2e-16 ***
## Pclass1         1  92.793  < 2.2e-16 ***
## Pclass2         1  28.788  8.076e-08 ***
## Age             1  20.588  5.694e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

2.5 Dugaan Peluang Peubah Respon

phi.x.1 <- function(c1, c2, c3, x){
  intercept = -1.22 ; beta.c1 = -2.6 ; beta.c2 = 2.32; beta.c3 = 1.2; 
  beta1 = -0.03
  pers <- intercept + beta.c1*c1 + beta.c2*c2 + beta.c3*c3 + beta1*x
  phi.x <- exp(pers)/(1+exp(pers))
  return(list(nilai.log.odds=pers, dugaan.peluang=phi.x))
}
summary(titanic$Age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   22.00   30.00   29.71   35.00   80.00
mc1 <- phi.x.1(1,1,0,30)
fc1 <- phi.x.1(0,1,0,30)
mc2 <- phi.x.1(1,0,1,30)
fc2 <- phi.x.1(0,0,1,30)
mc3 <- phi.x.1(1,0,0,30)
fc3 <- phi.x.1(0,0,0,30)

gol <- c("Male-Class 1", "Female-Class 1", "Male-Class 2", "Female-Class 2",
         "Male-Class 3", "Female-Class 3")
usia <- c(rep("30 tahun", 6))
logit1 <- c(mc1$nilai.log.odds, fc1$nilai.log.odds, mc2$nilai.log.odds, 
          fc2$nilai.log.odds, mc3$nilai.log.odds, fc3$nilai.log.odds)
prob <- c(mc1$dugaan.peluang, fc1$dugaan.peluang, mc2$dugaan.peluang, 
          fc2$dugaan.peluang, mc3$dugaan.peluang, fc3$dugaan.peluang)
dugaan1 <- data.frame(cbind(gol, usia, logit1, round(prob,2)))
colnames(dugaan1) <- c("Golongan", "Usia", "logit", "Dugaan Peluang")
dugaan1$`Dugaan Selamat` <- ifelse(dugaan1$`Dugaan Peluang` >= 0.5, "Selamat", 
                                   "Tidak Selamat")
kableExtra::kable(dugaan1, align = rep("c",3), 
                  caption = "Pendugaan Peluang Selamat dari Tragedi Titanic")
Pendugaan Peluang Selamat dari Tragedi Titanic
Golongan Usia logit Dugaan Peluang Dugaan Selamat
Male-Class 1 30 tahun -2.4 0.08 Tidak Selamat
Female-Class 1 30 tahun 0.2 0.55 Selamat
Male-Class 2 30 tahun -3.52 0.03 Tidak Selamat
Female-Class 2 30 tahun -0.92 0.28 Tidak Selamat
Male-Class 3 30 tahun -4.72 0.01 Tidak Selamat
Female-Class 3 30 tahun -2.12 0.11 Tidak Selamat

2.6 Odds Ratio Antarlevel Peubah

male <- phi.x.1(1,0,0,0)
female <- phi.x.1(0,0,0,0)
sel_sex <- male$nilai.log.odds-female$nilai.log.odds
or_sex <- exp(sel_sex)

c1 <- phi.x.1(0,1,0,0)
c3 <- phi.x.1(0,0,0,0)
sel_c1c3 <- c1$nilai.log.odds-c3$nilai.log.odds
or_c1c3 <- exp(sel_c1c3)

c2 <- phi.x.1(0,0,1,0)
sel_c1c2 <- c1$nilai.log.odds-c2$nilai.log.odds
or_c1c2 <- exp(sel_c1c2)

sel_c2c3 <- c2$nilai.log.odds-c3$nilai.log.odds
or_c2c3 <- exp(sel_c2c3)

rasio <- c("Male to Female", "Class 1 to Class 3", "Class 1 to Class 2", 
           "Class 2 to Class 3")
int.model1 <- c(male$nilai.log.odds, c1$nilai.log.odds, c1$nilai.log.odds, 
                c2$nilai.log.odds)
int.comp1 <- c(female$nilai.log.odds, c3$nilai.log.odds, c2$nilai.log.odds, 
               c3$nilai.log.odds)
sel_int <- c(sel_sex, sel_c1c3, sel_c1c2, sel_c2c3)
or <- c(or_sex, or_c1c3, or_c1c2, or_c2c3)
odds_rasio1 <- cbind(rasio, int.model1, int.comp1, sel_int, round(or,2))
colnames(odds_rasio1) <- c("Perbandingan", "Intersep Model", 
                           "Intersep Pembanding", "Selisih Intersep", "Odds Ratio")
kableExtra::kable(odds_rasio1, align = rep("c",3))
Perbandingan Intersep Model Intersep Pembanding Selisih Intersep Odds Ratio
Male to Female -3.82 -1.22 -2.6 0.07
Class 1 to Class 3 1.1 -1.22 2.32 10.18
Class 1 to Class 2 1.1 -0.02 1.12 3.06
Class 2 to Class 3 -0.02 -1.22 1.2 3.32

3. Regresi Logistik Tanpa Peubah Dummy

3.1 Model Regresi Logistik

titanic.nondummy <- glm(Survived~sex+Pclass+Age, data = titanic, 
                       family = binomial(link = "logit"))
summary(titanic.nondummy)
## 
## Call:
## glm(formula = Survived ~ sex + Pclass + Age, family = binomial(link = "logit"), 
##     data = titanic)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.6542  -0.6581  -0.4203   0.6383   2.4252  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  4.723927   0.449753  10.503  < 2e-16 ***
## sex         -2.604618   0.186767 -13.946  < 2e-16 ***
## Pclass      -1.164375   0.118905  -9.792  < 2e-16 ***
## Age         -0.033618   0.007359  -4.568 4.91e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1182.82  on 888  degrees of freedom
## Residual deviance:  804.76  on 885  degrees of freedom
## AIC: 812.76
## 
## Number of Fisher Scoring iterations: 5

3.2 Uji Simultan

nagelkerke(titanic.nondummy) 
## $Models
##                                                                                 
## Model: "glm, Survived ~ sex + Pclass + Age, binomial(link = \"logit\"), titanic"
## Null:  "glm, Survived ~ 1, binomial(link = \"logit\"), titanic"                 
## 
## $Pseudo.R.squared.for.model.vs.null
##                              Pseudo.R.squared
## McFadden                             0.319628
## Cox and Snell (ML)                   0.346404
## Nagelkerke (Cragg and Uhler)         0.470878
## 
## $Likelihood.ratio.test
##  Df.diff LogLik.diff  Chisq    p.value
##       -3     -189.03 378.06 1.2497e-81
## 
## $Number.of.observations
##           
## Model: 889
## Null:  889
## 
## $Messages
## [1] "Note: For models fit with REML, these statistics are based on refitting with ML"
## 
## $Warnings
## [1] "None"

3.3 Uji Parsial

Anova(titanic.nondummy, type="II", test="Wald")
## Analysis of Deviance Table (Type II tests)
## 
## Response: Survived
##        Df   Chisq Pr(>Chisq)    
## sex     1 194.486  < 2.2e-16 ***
## Pclass  1  95.892  < 2.2e-16 ***
## Age     1  20.871  4.912e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

3.4 Dugaan Peluang Peubah Respon

phi.x.2 <- function(x1, x2, x3){
  intercept = 4.72 ; b1 = -2.6 ; b2 = -1.16; b3 = -0.03
  pers <- intercept + b1*x1 + b2*x2 + b3*x3
  phi.x <- exp(pers)/(1+exp(pers))
  return(list(nilai.log.odds=pers, dugaan.peluang=phi.x))
}
m1 <- phi.x.2(1,1,30)
f1 <- phi.x.2(0,1,30)
m2 <- phi.x.2(1,2,30)
f2 <- phi.x.2(0,2,30)
m3 <- phi.x.2(1,3,30)
f3 <- phi.x.2(0,3,30)

gol2 <- c("Male-Class 1", "Female-Class 1", "Male-Class 2", "Female-Class 2",
         "Male-Class 3", "Female-Class 3")
usia2 <- c(rep("30 tahun", 6))
logit2 <- c(m1$nilai.log.odds, f1$nilai.log.odds, m2$nilai.log.odds, 
            f2$nilai.log.odds, m3$nilai.log.odds, f3$nilai.log.odds)
prob2 <- c(m1$dugaan.peluang, f1$dugaan.peluang, m2$dugaan.peluang, 
          f2$dugaan.peluang, m3$dugaan.peluang, f3$dugaan.peluang)
dugaan2 <- data.frame(cbind(gol2, usia2, round(logit2,2), round(prob2,2)))
colnames(dugaan2) <- c("Golongan", "Usia", "logit", "Dugaan Peluang")
dugaan2$`Dugaan Selamat` <- ifelse(dugaan2$`Dugaan Peluang` >= 0.5, "Selamat", 
                                   "Tidak Selamat")
kableExtra::kable(dugaan2, align = rep("c",4))
Golongan Usia logit Dugaan Peluang Dugaan Selamat
Male-Class 1 30 tahun 0.06 0.51 Selamat
Female-Class 1 30 tahun 2.66 0.93 Selamat
Male-Class 2 30 tahun -1.1 0.25 Tidak Selamat
Female-Class 2 30 tahun 1.5 0.82 Selamat
Male-Class 3 30 tahun -2.26 0.09 Tidak Selamat
Female-Class 3 30 tahun 0.34 0.58 Selamat

**3.5 Odds Ratio Antarlevel Peubah

male2 <- phi.x.2(1,0,0)
female2 <- phi.x.2(0,0,0)
sel_sex2 <- male2$nilai.log.odds-female2$nilai.log.odds
or_sex2 <- exp(sel_sex2)

c1.2 <- phi.x.2(0,1,0)
c3.2 <- phi.x.2(0,3,0)
sel_c1c3.2 <- c1.2$nilai.log.odds-c3.2$nilai.log.odds
or_c1c3.2 <- exp(sel_c1c3.2)

c2.2 <- phi.x.2(0,2,0)
sel_c1c2.2 <- c1.2$nilai.log.odds-c2.2$nilai.log.odds
or_c1c2.2 <- exp(sel_c1c2.2)

sel_c2c3.2 <- c2.2$nilai.log.odds-c3.2$nilai.log.odds
or_c2c3.2 <- exp(sel_c2c3.2)

rasio2 <- c("Male to Female", "Class 1 to Class 3", "Class 1 to Class 2", 
            "Class 2 to Class 3")
int.model2 <- c(male2$nilai.log.odds, c1.2$nilai.log.odds, c1.2$nilai.log.odds, 
                c2.2$nilai.log.odds)
int.comp2 <- c(female2$nilai.log.odds, c3.2$nilai.log.odds, c2.2$nilai.log.odds, 
               c3.2$nilai.log.odds)
sel_int2 <- c(sel_sex2, sel_c1c3.2, sel_c1c2.2, sel_c2c3.2)
or2 <- c(or_sex2, or_c1c3.2, or_c1c2.2, or_c2c3.2)
odds_rasio2 <- cbind(rasio2, int.model2, int.comp2, sel_int2, round(or2,2))
colnames(odds_rasio2) <- c("Perbandingan", "Intersep Model", 
                           "Intersep Pembanding", "Selisih Intersep", "Odds Ratio")
kableExtra::kable(odds_rasio2, align = rep("c",5))
Perbandingan Intersep Model Intersep Pembanding Selisih Intersep Odds Ratio
Male to Female 2.12 4.72 -2.6 0.07
Class 1 to Class 3 3.56 1.24 2.32 10.18
Class 1 to Class 2 3.56 2.4 1.16 3.19
Class 2 to Class 3 2.4 1.24 1.16 3.19