Loading Packages
library(tidyverse)
library(ggplot2)
library(dplyr)
library(sjmisc)
library(Zelig)
library(radiant.data)
The goal of this first assignment is to reimplement the code from slides 5.2-5.18 of lecture 6 using Zelig 5. In the lecture notes Zelig 4 was used, however for this assignment the code will be repeated but using zelig 5. Since simulations are involved, some of the numbers might vary.
titanic <- titanic %>%
mutate(surv = as.integer(survived)) %>%
mutate(survival = sjmisc::rec(surv, rec = "2=0; 1=1")) %>%
select(pclass, survived, survival, everything())
head(titanic)
summary(titanic)
pclass survived survival sex age sibsp
1st:282 Yes:425 Min. :0.0000 female:386 Min. : 0.1667 Min. :0.0000
2nd:261 No :618 1st Qu.:0.0000 male :657 1st Qu.:21.0000 1st Qu.:0.0000
3rd:500 Median :0.0000 Median :28.0000 Median :0.0000
Mean :0.4075 Mean :29.8132 Mean :0.5043
3rd Qu.:1.0000 3rd Qu.:39.0000 3rd Qu.:1.0000
Max. :1.0000 Max. :80.0000 Max. :8.0000
parch fare name cabin embarked
Min. :0.0000 Min. : 0.00 Length:1043 Length:1043 Cherbourg :212
1st Qu.:0.0000 1st Qu.: 8.05 Class :character Class :character Queenstown : 50
Median :0.0000 Median : 15.75 Mode :character Mode :character Southampton:781
Mean :0.4219 Mean : 36.60
3rd Qu.:1.0000 3rd Qu.: 35.08
Max. :6.0000 Max. :512.33
surv
Min. :1.000
1st Qu.:1.000
Median :2.000
Mean :1.593
3rd Qu.:2.000
Max. :2.000
z.out1 <- zlogit$new()
z.out1$zelig(survival ~ age + sex*pclass + fare, data = titanic)
summary(z.out1)
Model:
Call:
z.out1$zelig(formula = survival ~ age + sex * pclass + fare,
data = titanic)
Deviance Residuals:
Min 1Q Median 3Q Max
-3.0634 -0.6641 -0.4943 0.4336 2.4941
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 4.8978215 0.6131092 7.988 0.00000000000000137
age -0.0387245 0.0068044 -5.691 0.00000001262563680
sexmale -3.8996177 0.5015659 -7.775 0.00000000000000755
pclass2nd -1.5923247 0.6024844 -2.643 0.00822
pclass3rd -4.1382715 0.5601819 -7.387 0.00000000000014976
fare -0.0009058 0.0020509 -0.442 0.65874
sexmale:pclass2nd -0.0600076 0.6372949 -0.094 0.92498
sexmale:pclass3rd 2.5019110 0.5479901 4.566 0.00000498035051247
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1409.99 on 1042 degrees of freedom
Residual deviance: 931.45 on 1035 degrees of freedom
AIC: 947.45
Number of Fisher Scoring iterations: 5
Next step: Use 'setx' method
z.out1$setrange(age=0:80)
z.out1$sim()
z.out1$graph()
The maximum fare value was acquired from the statistics above.
z.out1$setrange(fare=0:513)
z.out1$sim()
z.out1$graph()
z.out1$setx(sex="male")
z.out1$setx1(sex="female")
z.out1$sim()
fd <- z.out1$get_qi(xvalue="x1", qi="fd")
summary(fd)
V1
Min. :0.1311
1st Qu.:0.2259
Median :0.2554
Mean :0.2549
3rd Qu.:0.2840
Max. :0.3846
plot(z.out1)
1st Class
z5.1st <- zlogit$new()
z5.1st$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z5.1st$setx(sex = "male", pclass = "1st")
z5.1st$setx1(sex = "female", pclass = "1st")
z5.1st$sim()
plot(z5.1st)
Second Class
z5.2nd <- zlogit$new()
z5.2nd$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z5.2nd$setx(sex = "male", pclass = "2nd")
z5.2nd$setx1(sex = "female", pclass = "2nd")
z5.2nd$sim()
plot(z5.2nd)
Third Class
z5.3rd <- zlogit$new()
z5.3rd$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z5.3rd$setx(sex = "male", pclass = "3rd")
z5.3rd$setx1(sex = "female", pclass = "3rd")
z5.3rd$sim()
plot(z5.3rd)
d1 <- z5.1st$get_qi(xvalue="x1", qi="fd")
d2 <- z5.2nd$get_qi(xvalue="x1", qi="fd")
d3 <- z5.3rd$get_qi(xvalue="x1", qi="fd")
dfd <- as.data.frame(cbind(d1, d2, d3))
head(dfd)
tidd <- dfd %>%
gather(class, simv, 1:3)
head(tidd)
tidd %>%
group_by(class) %>%
summarise(mean = mean(simv), sd = sd(simv))
ggplot(tidd, aes(simv)) + geom_histogram() + facet_grid(~class)