Number of cases in table: 2201
Number of factors: 4
Test for independence of all factors:
Chisq = 1637.4, df = 25, p-value = 0
Chi-squared approximation may be incorrect
# Select relevant variables and clean the dataclean_df <- titanic %>% dplyr::select(Survived, Pclass, Sex, Age, SibSp, Parch, Fare, Embarked) %>%drop_na() %>%mutate(Survived =as.factor(Survived),Pclass =as.factor(Pclass),Sex =as.factor(Sex),Embarked =as.factor(Embarked) )# Verify the structure of the cleaned data framestr(clean_df)
# Run the "kitchen sink" model with all potential predictorskitchen_sink_model <-glm(Survived ~ Pclass + Sex + Age + SibSp + Parch + Fare + Embarked,data = clean_df,family ="binomial")summary(kitchen_sink_model)
Call:
glm(formula = Survived ~ Pclass + Sex + Age + SibSp + Parch +
Fare + Embarked, family = "binomial", data = clean_df)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 16.691979 607.920015 0.027 0.978095
Pclass2 -1.189637 0.329197 -3.614 0.000302 ***
Pclass3 -2.395220 0.343356 -6.976 3.04e-12 ***
Sexmale -2.637859 0.223006 -11.829 < 2e-16 ***
Age -0.043308 0.008322 -5.204 1.95e-07 ***
SibSp -0.362925 0.129290 -2.807 0.005000 **
Parch -0.060365 0.123944 -0.487 0.626233
Fare 0.001451 0.002595 0.559 0.576143
EmbarkedC -12.259048 607.919885 -0.020 0.983911
EmbarkedQ -13.082427 607.920088 -0.022 0.982831
EmbarkedS -12.661895 607.919868 -0.021 0.983383
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 964.52 on 713 degrees of freedom
Residual deviance: 632.34 on 703 degrees of freedom
AIC: 654.34
Number of Fisher Scoring iterations: 13
# Perform backward selection using stepAICfinal_model <-stepAIC(kitchen_sink_model, direction ="backward")
Start: AIC=654.34
Survived ~ Pclass + Sex + Age + SibSp + Parch + Fare + Embarked
Df Deviance AIC
- Embarked 3 635.78 651.78
- Parch 1 632.58 652.58
- Fare 1 632.67 652.67
<none> 632.34 654.34
- SibSp 1 640.85 660.85
- Age 1 662.15 682.15
- Pclass 2 686.64 704.64
- Sex 1 806.80 826.80
Step: AIC=651.78
Survived ~ Pclass + Sex + Age + SibSp + Parch + Fare
Df Deviance AIC
- Parch 1 636.03 650.03
- Fare 1 636.46 650.46
<none> 635.78 651.78
- SibSp 1 645.25 659.25
- Age 1 667.36 681.36
- Pclass 2 695.26 707.26
- Sex 1 814.49 828.49
Step: AIC=650.03
Survived ~ Pclass + Sex + Age + SibSp + Fare
Df Deviance AIC
- Fare 1 636.56 648.56
<none> 636.03 650.03
- SibSp 1 647.23 659.23
- Age 1 667.61 679.61
- Pclass 2 699.21 709.21
- Sex 1 819.08 831.08
Step: AIC=648.56
Survived ~ Pclass + Sex + Age + SibSp
Df Deviance AIC
<none> 636.56 648.56
- SibSp 1 647.28 657.28
- Age 1 669.40 679.40
- Pclass 2 742.29 750.29
- Sex 1 823.72 833.72
# View the summary of the final modelsummary(final_model)
Call:
glm(formula = Survived ~ Pclass + Sex + Age + SibSp, family = "binomial",
data = clean_df)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 4.334201 0.450700 9.617 < 2e-16 ***
Pclass2 -1.414360 0.284727 -4.967 6.78e-07 ***
Pclass3 -2.652618 0.285832 -9.280 < 2e-16 ***
Sexmale -2.627679 0.214771 -12.235 < 2e-16 ***
Age -0.044760 0.008225 -5.442 5.27e-08 ***
SibSp -0.380190 0.121516 -3.129 0.00176 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 964.52 on 713 degrees of freedom
Residual deviance: 636.56 on 708 degrees of freedom
AIC: 648.56
Number of Fisher Scoring iterations: 5