library(Zelig)
library(tidyr)
library(dplyr)
library(ggplot2)
library(mvtnorm)
library(sjmisc)
These are the packages used for this week’s assignment.
library(radiant.data)
data(titanic)
head(titanic)
I loaded the “titanic” data.
titanic=titanic %>%
mutate(surv = as.integer(survived)) %>%
mutate(survival = sjmisc::rec(surv, rec = "2=0; 1=1")) %>%
select(pclass, survived, survival, everything())
I recoded for logit.
z5=zlogit$new()
z5$zelig(survival ~ age + sex * pclass + fare, data = titanic)
summary(z5)
## Model:
##
## Call:
## z5$zelig(formula = survival ~ age + sex * pclass + fare, data = titanic)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.0634 -0.6641 -0.4943 0.4336 2.4941
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.8978215 0.6131092 7.988 1.37e-15
## age -0.0387245 0.0068044 -5.691 1.26e-08
## sexmale -3.8996177 0.5015659 -7.775 7.55e-15
## pclass2nd -1.5923247 0.6024844 -2.643 0.00822
## pclass3rd -4.1382715 0.5601819 -7.387 1.50e-13
## fare -0.0009058 0.0020509 -0.442 0.65874
## sexmale:pclass2nd -0.0600076 0.6372949 -0.094 0.92498
## sexmale:pclass3rd 2.5019110 0.5479901 4.566 4.98e-06
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1409.99 on 1042 degrees of freedom
## Residual deviance: 931.45 on 1035 degrees of freedom
## AIC: 947.45
##
## Number of Fisher Scoring iterations: 5
##
## Next step: Use 'setx' method
I ran logit with Zelig.
a.range = min(titanic$age):max(titanic$age)
z5$setrange(age=a.range)
z5$sim()
z5$graph()
This simulation shows the expected values for age, with all other features set to their default values. The survival value in the Titanic decreases as age increases.
f.range = min(titanic$fare):max(titanic$fare)
z5$setrange(fare=f.range)
z5$sim()
z5$graph()
This simulation shows the expected values for fare, with all other features set to their default values. The survival value in the Titanic appears to remain constant (for the most part).
z5=zlogit$new()
z5$zelig(survival ~ age + sex * pclass + fare, data = titanic)
z5$setx(sex="male")
z5$setx1(sex="female")
z5$sim()
summary(z5)
##
## sim x :
## -----
## ev
## mean sd 50% 2.5% 97.5%
## [1,] 0.1396913 0.01971671 0.1385753 0.1037234 0.1798837
## pv
## 0 1
## [1,] 0.869 0.131
##
## sim x1 :
## -----
## ev
## mean sd 50% 2.5% 97.5%
## [1,] 0.3976795 0.04283382 0.3984457 0.3114398 0.4798943
## pv
## 0 1
## [1,] 0.588 0.412
## fd
## mean sd 50% 2.5% 97.5%
## [1,] 0.2579882 0.04337139 0.2579938 0.1714072 0.3437799
fd <- z5$get_qi(xvalue="x1", qi="fd")
summary(fd)
## V1
## Min. :0.1264
## 1st Qu.:0.2286
## Median :0.2580
## Mean :0.2580
## 3rd Qu.:0.2875
## Max. :0.3951
par("mar")
## [1] 5.1 4.1 4.1 2.1
par(mar= c(1,1,1,1))
z5$graph()
This simulation shows the expected values for sex difference, with all other features set to their default values. The survival value in the Titanic is greater for females than males (difference is about .25)
z5_1c=zlogit$new()
z5_1c$zelig(survival ~ age + sex * pclass + fare, data = titanic)
z5_1c$setx(sex="male",pclass="1st")
z5_1c$setx1(sex="female",pclass="1st")
z5_1c$sim()
par("mar")
## [1] 5.1 4.1 4.1 2.1
par(mar= c(1,1,1,1))
z5_1c$graph()
The survival value for first class appears is greater for females than males (difference is about .5-.55) Females appear to have nearly 100% expected surivial value.
z5_2c=zlogit$new()
z5_2c$zelig(survival ~ age + sex * pclass + fare, data = titanic)
z5_2c$setx(sex="male",pclass="2nd")
z5_2c$setx1(sex="female",pclass="2nd")
z5_2c$sim()
par("mar")
## [1] 5.1 4.1 4.1 2.1
par(mar= c(1,1,1,1))
z5_2c$graph()
The survival value for second class appears is again greater for females than males (difference is about .75-.80). Both males and females survival value decreased compared to first class.
z5_3c=zlogit$new()
z5_3c$zelig(survival ~ age + sex * pclass + fare, data = titanic)
z5_3c$setx(sex="male",pclass="3rd")
z5_3c$setx1(sex="female",pclass="3rd")
z5_3c$sim()
par("mar")
## [1] 5.1 4.1 4.1 2.1
par(mar= c(1,1,1,1))
z5_3c$graph()
The survival value for third class appears is again greater for females than males (difference is about .25). Both males and females survival value decreased compared to first class and second class.
d1 <- z5_1c$get_qi(xvalue="x1", qi="fd")
d2 <- z5_2c$get_qi(xvalue="x1", qi="fd")
d3 <- z5_3c$get_qi(xvalue="x1", qi="fd")
dfd <- as.data.frame(cbind(d1, d2, d3))
head(dfd)
library(tidyr)
tidd <- dfd %>%
gather(class, simv, 1:3)
head(tidd)
library(dplyr)
tidd %>%
group_by(class) %>%
summarise(mean = mean(simv), sd = sd(simv))
library(ggplot2)
ggplot(tidd, aes(simv)) + geom_histogram() + facet_grid(~class)
This shows the simulated expected first difference betwene males and females across all three classes. The survival value is the lowest in third class (already explained above).