library(rlang)
library(rcompanion)
library(car)## Loading required package: carData
library(dplyr)##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)titanic <- data.frame(read.csv("C:/ICA/Semester 5/PADK/titanic_clean.csv"))
data <- head(titanic)
kableExtra::kable(data, align = rep('c',9))| X | Survived | Pclass | Sex | Age | SibSp | Parch | Fare | Embarked |
|---|---|---|---|---|---|---|---|---|
| 1 | 0 | 3 | male | 22.00000 | 1 | 0 | 7.2500 | S |
| 2 | 1 | 1 | female | 38.00000 | 1 | 0 | 71.2833 | C |
| 3 | 1 | 3 | female | 26.00000 | 0 | 0 | 7.9250 | S |
| 4 | 1 | 1 | female | 35.00000 | 1 | 0 | 53.1000 | S |
| 5 | 0 | 3 | male | 35.00000 | 0 | 0 | 8.0500 | S |
| 6 | 0 | 3 | male | 29.69912 | 0 | 0 | 8.4583 | Q |
titanic$sex <- ifelse(titanic$Sex == "male", 1, 0)titanic$Pclass1 <- ifelse(titanic$Pclass == "1", 1, 0)
titanic$Pclass2 <- ifelse(titanic$Pclass == "2", 1, 0) titanic <- titanic[, c(2,3,5,10:12)]
titanic$Survived <- as.factor(titanic$Survived)
titanic$Pclass1 <- as.factor(titanic$Pclass1)
titanic$Pclass2 <- as.factor(titanic$Pclass2)
titanic$Age <- as.integer(round(titanic$Age, 0))
titanic$sex <- as.integer(titanic$sex)
str(titanic)## 'data.frame': 889 obs. of 6 variables:
## $ Survived: Factor w/ 2 levels "0","1": 1 2 2 2 1 1 1 1 2 2 ...
## $ Pclass : int 3 1 3 1 3 3 1 3 3 2 ...
## $ Age : int 22 38 26 35 35 30 54 2 27 14 ...
## $ sex : int 1 0 0 0 1 1 1 1 0 0 ...
## $ Pclass1 : Factor w/ 2 levels "0","1": 1 2 1 2 1 1 2 1 1 1 ...
## $ Pclass2 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
library(dplyr)
titanic$survived <- as.integer(titanic$Survived)
d <- titanic %>% count(survived)
pie(c(d[1,2],d[2,2]), col = c("#DC3535", "#6ECCAF"),
labels = c("Tidak Selamat", "Selamat"), border = "black",
main = "Status Penumpang Titanic")library(ggplot2)
ggplot(titanic) +
aes(x = Survived, fill = Pclass) +
geom_bar() +
scale_fill_viridis_c(option = "magma", direction = 1) +
labs(
x = "Status Penumpang (0: Tidak Selamat, 1: Selamat)",
y = "Jumlah Penumpang",
title = "Status Penumpang Titanic",
subtitle = "Berdasarkan Kelas Tumpangan (Kelas 1, 2, 3)",
fill = "Kelas"
) +
theme_bw() +
theme(
legend.position = "none",
plot.title = element_text(face = "bold",
hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)
) +
facet_wrap(vars(Pclass), scales = "free_x")titanic.dummy <- glm(Survived~as.factor(sex)+Pclass1+Pclass2+Age, data = titanic,
family = binomial(link = "logit"))
summary(titanic.dummy)##
## Call:
## glm(formula = Survived ~ as.factor(sex) + Pclass1 + Pclass2 +
## Age, family = binomial(link = "logit"), data = titanic)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6470 -0.6683 -0.4189 0.6290 2.4272
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.219254 0.242457 5.029 4.94e-07 ***
## as.factor(sex)1 -2.603856 0.186876 -13.934 < 2e-16 ***
## Pclass11 2.320532 0.240896 9.633 < 2e-16 ***
## Pclass21 1.204652 0.224521 5.365 8.08e-08 ***
## Age -0.033490 0.007381 -4.537 5.69e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1182.82 on 888 degrees of freedom
## Residual deviance: 804.71 on 884 degrees of freedom
## AIC: 814.71
##
## Number of Fisher Scoring iterations: 5
nagelkerke(titanic.dummy) ## $Models
##
## Model: "glm, Survived ~ as.factor(sex) + Pclass1 + Pclass2 + Age, binomial(link = \"logit\"), titanic"
## Null: "glm, Survived ~ 1, binomial(link = \"logit\"), titanic"
##
## $Pseudo.R.squared.for.model.vs.null
## Pseudo.R.squared
## McFadden 0.319666
## Cox and Snell (ML) 0.346437
## Nagelkerke (Cragg and Uhler) 0.470923
##
## $Likelihood.ratio.test
## Df.diff LogLik.diff Chisq p.value
## -4 -189.05 378.11 1.4931e-80
##
## $Number.of.observations
##
## Model: 889
## Null: 889
##
## $Messages
## [1] "Note: For models fit with REML, these statistics are based on refitting with ML"
##
## $Warnings
## [1] "None"
Anova(titanic.dummy, type="II", test="Wald")## Analysis of Deviance Table (Type II tests)
##
## Response: Survived
## Df Chisq Pr(>Chisq)
## as.factor(sex) 1 194.144 < 2.2e-16 ***
## Pclass1 1 92.793 < 2.2e-16 ***
## Pclass2 1 28.788 8.076e-08 ***
## Age 1 20.588 5.694e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
phi.x.1 <- function(c1, c2, c3, x){
intercept = -1.22 ; beta.c1 = -2.6 ; beta.c2 = 2.32; beta.c3 = 1.2;
beta1 = -0.03
pers <- intercept + beta.c1*c1 + beta.c2*c2 + beta.c3*c3 + beta1*x
phi.x <- exp(pers)/(1+exp(pers))
return(list(nilai.log.odds=pers, dugaan.peluang=phi.x))
}summary(titanic$Age)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 22.00 30.00 29.71 35.00 80.00
mc1 <- phi.x.1(1,1,0,30)
fc1 <- phi.x.1(0,1,0,30)
mc2 <- phi.x.1(1,0,1,30)
fc2 <- phi.x.1(0,0,1,30)
mc3 <- phi.x.1(1,0,0,30)
fc3 <- phi.x.1(0,0,0,30)
gol <- c("Male-Class 1", "Female-Class 1", "Male-Class 2", "Female-Class 2",
"Male-Class 3", "Female-Class 3")
usia <- c(rep("30 tahun", 6))
logit1 <- c(mc1$nilai.log.odds, fc1$nilai.log.odds, mc2$nilai.log.odds,
fc2$nilai.log.odds, mc3$nilai.log.odds, fc3$nilai.log.odds)
prob <- c(mc1$dugaan.peluang, fc1$dugaan.peluang, mc2$dugaan.peluang,
fc2$dugaan.peluang, mc3$dugaan.peluang, fc3$dugaan.peluang)
dugaan1 <- data.frame(cbind(gol, usia, logit1, round(prob,2)))
colnames(dugaan1) <- c("Golongan", "Usia", "logit", "Dugaan Peluang")
dugaan1$`Dugaan Selamat` <- ifelse(dugaan1$`Dugaan Peluang` >= 0.5, "Selamat",
"Tidak Selamat")
kableExtra::kable(dugaan1, align = rep("c",3),
caption = "Pendugaan Peluang Selamat dari Tragedi Titanic")| Golongan | Usia | logit | Dugaan Peluang | Dugaan Selamat |
|---|---|---|---|---|
| Male-Class 1 | 30 tahun | -2.4 | 0.08 | Tidak Selamat |
| Female-Class 1 | 30 tahun | 0.2 | 0.55 | Selamat |
| Male-Class 2 | 30 tahun | -3.52 | 0.03 | Tidak Selamat |
| Female-Class 2 | 30 tahun | -0.92 | 0.28 | Tidak Selamat |
| Male-Class 3 | 30 tahun | -4.72 | 0.01 | Tidak Selamat |
| Female-Class 3 | 30 tahun | -2.12 | 0.11 | Tidak Selamat |
male <- phi.x.1(1,0,0,0)
female <- phi.x.1(0,0,0,0)
sel_sex <- male$nilai.log.odds-female$nilai.log.odds
or_sex <- exp(sel_sex)
c1 <- phi.x.1(0,1,0,0)
c3 <- phi.x.1(0,0,0,0)
sel_c1c3 <- c1$nilai.log.odds-c3$nilai.log.odds
or_c1c3 <- exp(sel_c1c3)
c2 <- phi.x.1(0,0,1,0)
sel_c1c2 <- c1$nilai.log.odds-c2$nilai.log.odds
or_c1c2 <- exp(sel_c1c2)
sel_c2c3 <- c2$nilai.log.odds-c3$nilai.log.odds
or_c2c3 <- exp(sel_c2c3)
rasio <- c("Male to Female", "Class 1 to Class 3", "Class 1 to Class 2",
"Class 2 to Class 3")
int.model1 <- c(male$nilai.log.odds, c1$nilai.log.odds, c1$nilai.log.odds,
c2$nilai.log.odds)
int.comp1 <- c(female$nilai.log.odds, c3$nilai.log.odds, c2$nilai.log.odds,
c3$nilai.log.odds)
sel_int <- c(sel_sex, sel_c1c3, sel_c1c2, sel_c2c3)
or <- c(or_sex, or_c1c3, or_c1c2, or_c2c3)
odds_rasio1 <- cbind(rasio, int.model1, int.comp1, sel_int, round(or,2))
colnames(odds_rasio1) <- c("Perbandingan", "Intersep Model",
"Intersep Pembanding", "Selisih Intersep", "Odds Ratio")
kableExtra::kable(odds_rasio1, align = rep("c",3))| Perbandingan | Intersep Model | Intersep Pembanding | Selisih Intersep | Odds Ratio |
|---|---|---|---|---|
| Male to Female | -3.82 | -1.22 | -2.6 | 0.07 |
| Class 1 to Class 3 | 1.1 | -1.22 | 2.32 | 10.18 |
| Class 1 to Class 2 | 1.1 | -0.02 | 1.12 | 3.06 |
| Class 2 to Class 3 | -0.02 | -1.22 | 1.2 | 3.32 |
titanic.nondummy <- glm(Survived~sex+Pclass+Age, data = titanic,
family = binomial(link = "logit"))
summary(titanic.nondummy)##
## Call:
## glm(formula = Survived ~ sex + Pclass + Age, family = binomial(link = "logit"),
## data = titanic)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6542 -0.6581 -0.4203 0.6383 2.4252
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.723927 0.449753 10.503 < 2e-16 ***
## sex -2.604618 0.186767 -13.946 < 2e-16 ***
## Pclass -1.164375 0.118905 -9.792 < 2e-16 ***
## Age -0.033618 0.007359 -4.568 4.91e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1182.82 on 888 degrees of freedom
## Residual deviance: 804.76 on 885 degrees of freedom
## AIC: 812.76
##
## Number of Fisher Scoring iterations: 5
nagelkerke(titanic.nondummy) ## $Models
##
## Model: "glm, Survived ~ sex + Pclass + Age, binomial(link = \"logit\"), titanic"
## Null: "glm, Survived ~ 1, binomial(link = \"logit\"), titanic"
##
## $Pseudo.R.squared.for.model.vs.null
## Pseudo.R.squared
## McFadden 0.319628
## Cox and Snell (ML) 0.346404
## Nagelkerke (Cragg and Uhler) 0.470878
##
## $Likelihood.ratio.test
## Df.diff LogLik.diff Chisq p.value
## -3 -189.03 378.06 1.2497e-81
##
## $Number.of.observations
##
## Model: 889
## Null: 889
##
## $Messages
## [1] "Note: For models fit with REML, these statistics are based on refitting with ML"
##
## $Warnings
## [1] "None"
Anova(titanic.nondummy, type="II", test="Wald")## Analysis of Deviance Table (Type II tests)
##
## Response: Survived
## Df Chisq Pr(>Chisq)
## sex 1 194.486 < 2.2e-16 ***
## Pclass 1 95.892 < 2.2e-16 ***
## Age 1 20.871 4.912e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
phi.x.2 <- function(x1, x2, x3){
intercept = 4.72 ; b1 = -2.6 ; b2 = -1.16; b3 = -0.03
pers <- intercept + b1*x1 + b2*x2 + b3*x3
phi.x <- exp(pers)/(1+exp(pers))
return(list(nilai.log.odds=pers, dugaan.peluang=phi.x))
}m1 <- phi.x.2(1,1,30)
f1 <- phi.x.2(0,1,30)
m2 <- phi.x.2(1,2,30)
f2 <- phi.x.2(0,2,30)
m3 <- phi.x.2(1,3,30)
f3 <- phi.x.2(0,3,30)
gol2 <- c("Male-Class 1", "Female-Class 1", "Male-Class 2", "Female-Class 2",
"Male-Class 3", "Female-Class 3")
usia2 <- c(rep("30 tahun", 6))
logit2 <- c(m1$nilai.log.odds, f1$nilai.log.odds, m2$nilai.log.odds,
f2$nilai.log.odds, m3$nilai.log.odds, f3$nilai.log.odds)
prob2 <- c(m1$dugaan.peluang, f1$dugaan.peluang, m2$dugaan.peluang,
f2$dugaan.peluang, m3$dugaan.peluang, f3$dugaan.peluang)
dugaan2 <- data.frame(cbind(gol2, usia2, round(logit2,2), round(prob2,2)))
colnames(dugaan2) <- c("Golongan", "Usia", "logit", "Dugaan Peluang")
dugaan2$`Dugaan Selamat` <- ifelse(dugaan2$`Dugaan Peluang` >= 0.5, "Selamat",
"Tidak Selamat")
kableExtra::kable(dugaan2, align = rep("c",4))| Golongan | Usia | logit | Dugaan Peluang | Dugaan Selamat |
|---|---|---|---|---|
| Male-Class 1 | 30 tahun | 0.06 | 0.51 | Selamat |
| Female-Class 1 | 30 tahun | 2.66 | 0.93 | Selamat |
| Male-Class 2 | 30 tahun | -1.1 | 0.25 | Tidak Selamat |
| Female-Class 2 | 30 tahun | 1.5 | 0.82 | Selamat |
| Male-Class 3 | 30 tahun | -2.26 | 0.09 | Tidak Selamat |
| Female-Class 3 | 30 tahun | 0.34 | 0.58 | Selamat |
male2 <- phi.x.2(1,0,0)
female2 <- phi.x.2(0,0,0)
sel_sex2 <- male2$nilai.log.odds-female2$nilai.log.odds
or_sex2 <- exp(sel_sex2)
c1.2 <- phi.x.2(0,1,0)
c3.2 <- phi.x.2(0,3,0)
sel_c1c3.2 <- c1.2$nilai.log.odds-c3.2$nilai.log.odds
or_c1c3.2 <- exp(sel_c1c3.2)
c2.2 <- phi.x.2(0,2,0)
sel_c1c2.2 <- c1.2$nilai.log.odds-c2.2$nilai.log.odds
or_c1c2.2 <- exp(sel_c1c2.2)
sel_c2c3.2 <- c2.2$nilai.log.odds-c3.2$nilai.log.odds
or_c2c3.2 <- exp(sel_c2c3.2)
rasio2 <- c("Male to Female", "Class 1 to Class 3", "Class 1 to Class 2",
"Class 2 to Class 3")
int.model2 <- c(male2$nilai.log.odds, c1.2$nilai.log.odds, c1.2$nilai.log.odds,
c2.2$nilai.log.odds)
int.comp2 <- c(female2$nilai.log.odds, c3.2$nilai.log.odds, c2.2$nilai.log.odds,
c3.2$nilai.log.odds)
sel_int2 <- c(sel_sex2, sel_c1c3.2, sel_c1c2.2, sel_c2c3.2)
or2 <- c(or_sex2, or_c1c3.2, or_c1c2.2, or_c2c3.2)
odds_rasio2 <- cbind(rasio2, int.model2, int.comp2, sel_int2, round(or2,2))
colnames(odds_rasio2) <- c("Perbandingan", "Intersep Model",
"Intersep Pembanding", "Selisih Intersep", "Odds Ratio")
kableExtra::kable(odds_rasio2, align = rep("c",5))| Perbandingan | Intersep Model | Intersep Pembanding | Selisih Intersep | Odds Ratio |
|---|---|---|---|---|
| Male to Female | 2.12 | 4.72 | -2.6 | 0.07 |
| Class 1 to Class 3 | 3.56 | 1.24 | 2.32 | 10.18 |
| Class 1 to Class 2 | 3.56 | 2.4 | 1.16 | 3.19 |
| Class 2 to Class 3 | 2.4 | 1.24 | 1.16 | 3.19 |