In this assignment, I will be applying the Zelig 5 syntax to recreate the computations described on pages 5.2-5.18 of the lecture slides. The objective is to estimate the probability of survival on the titanic by age, fare, sex, and class.
library(Zelig)
library(texreg)
library(car)
library(mvtnorm)
library(ggplot2)
library(radiant.data)
head(titanic_z)
z5titanic<- zlogit$new()
z5titanic$zelig(survival ~ age + sex*pclass + fare, data = titanic_z)
summary(z5titanic)
Model:
Call:
z5titanic$zelig(formula = survival ~ age + sex * pclass + fare,
data = titanic_z)
Deviance Residuals:
Min 1Q Median 3Q Max
-3.0628 -0.6636 -0.4941 0.4337 2.4940
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 4.8959649 0.6128145 7.989 0.00000000000000136
age -0.0386781 0.0067926 -5.694 0.00000001239977237
sexmale -3.9001038 0.5015680 -7.776 0.00000000000000750
pclass2nd -1.5922712 0.6024689 -2.643 0.00822
pclass3rd -4.1381922 0.5601346 -7.388 0.00000000000014922
fare -0.0009074 0.0020510 -0.442 0.65820
sexmale:pclass2nd -0.0603255 0.6373047 -0.095 0.92459
sexmale:pclass3rd 2.5016703 0.5479814 4.565 0.00000498908018340
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1409.99 on 1042 degrees of freedom
Residual deviance: 931.42 on 1035 degrees of freedom
AIC: 947.42
Number of Fisher Scoring iterations: 5
Next step: Use 'setx' method
z5titanic$setrange(age = min(titanic_z$age):max(titanic_z$age))
z5titanic$sim()
ci.plot(z5titanic)
z5titanic$setrange(fare = min(titanic_z$fare):max(titanic_z$fare))
z5titanic$sim()
ci.plot(z5titanic)
z5sexdiff <- zlogit$new()
z5sexdiff$zelig(survival ~ age + sex*pclass + fare, data = titanic_z)
z5sexdiff$setx(sex = "male")
z5sexdiff$setx1(sex = "female")
z5sexdiff$sim()
summary(z5sexdiff)
sim x :
-----
ev
mean sd 50% 2.5% 97.5%
[1,] 0.1406442 0.01941659 0.1391434 0.1063094 0.1815272
pv
0 1
[1,] 0.85 0.15
sim x1 :
-----
ev
mean sd 50% 2.5% 97.5%
[1,] 0.3952872 0.04363755 0.3931868 0.3109656 0.4854279
pv
0 1
[1,] 0.587 0.413
fd
mean sd 50% 2.5% 97.5%
[1,] 0.2546429 0.0451618 0.251758 0.1656266 0.3415504
plot(z5sexdiff)
1st class
z5_1class <- zlogit$new()
z5_1class$zelig(survival ~ age + sex*pclass + fare, data = titanic_z)
z5_1class$setx(sex = "male", pclass = "1st")
z5_1class$setx1(sex = "female", pclass = "1st")
z5_1class$sim()
plot(z5_1class)
2nd class
z5_2class <- zlogit$new()
z5_2class$zelig(survival ~ age + sex*pclass + fare, data = titanic_z)
z5_2class$setx(sex = "male", pclass = "2nd")
z5_2class$setx1(sex = "female", pclass = "2nd")
z5_2class$sim()
plot(z5_2class)
3rd class
z5_3class <- zlogit$new()
z5_3class$zelig(survival ~ age + sex*pclass + fare, data = titanic_z)
z5_3class$setx(sex = "male", pclass = "3rd")
z5_3class$setx1(sex = "female", pclass = "3rd")
z5_3class$sim()
plot(z5_3class)
Consolidating into one table
d1 <- z5_1class$get_qi(xvalue="x1", qi="fd")
d2 <- z5_2class$get_qi(xvalue="x1", qi="fd")
d3 <- z5_3class$get_qi(xvalue="x1", qi="fd")
dfd <- as.data.frame(cbind(d1, d2, d3))
head(dfd)
library(tidyr)
tidd <- dfd %>%
gather(class, simv, 1:3)
head(tidd)
tidd %>%
group_by(class) %>%
summarise(mean = mean(simv), sd = sd(simv))
Plotting class variations in sex difference
ggplot(tidd, aes(simv)) + geom_histogram() + facet_grid(~class)