library(Zelig)
library(tidyr)
library(dplyr)
library(ggplot2)
library(mvtnorm)
library(sjmisc)

These are the packages used for this week’s assignment.

Zelig 5 Demo

library(radiant.data)
data(titanic)
head(titanic)

I loaded the “titanic” data.

titanic=titanic %>%
  mutate(surv = as.integer(survived)) %>% 
  mutate(survival = sjmisc::rec(surv, rec = "2=0; 1=1")) %>% 
  select(pclass, survived, survival, everything())

I recoded for logit.

z5=zlogit$new()
z5$zelig(survival ~ age + sex * pclass + fare, data = titanic)
summary(z5)
## Model: 
## 
## Call:
## z5$zelig(formula = survival ~ age + sex * pclass + fare, data = titanic)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.0634  -0.6641  -0.4943   0.4336   2.4941  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)
## (Intercept)        4.8978215  0.6131092   7.988 1.37e-15
## age               -0.0387245  0.0068044  -5.691 1.26e-08
## sexmale           -3.8996177  0.5015659  -7.775 7.55e-15
## pclass2nd         -1.5923247  0.6024844  -2.643  0.00822
## pclass3rd         -4.1382715  0.5601819  -7.387 1.50e-13
## fare              -0.0009058  0.0020509  -0.442  0.65874
## sexmale:pclass2nd -0.0600076  0.6372949  -0.094  0.92498
## sexmale:pclass3rd  2.5019110  0.5479901   4.566 4.98e-06
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1409.99  on 1042  degrees of freedom
## Residual deviance:  931.45  on 1035  degrees of freedom
## AIC: 947.45
## 
## Number of Fisher Scoring iterations: 5
## 
## Next step: Use 'setx' method

I ran logit with Zelig.

Age Effect

a.range = min(titanic$age):max(titanic$age)
z5$setrange(age=a.range)
z5$sim()
z5$graph()

This simulation shows the expected values for age, with all other features set to their default values. The survival value in the Titanic decreases as age increases.

Fare Effect

f.range = min(titanic$fare):max(titanic$fare)
z5$setrange(fare=f.range)
z5$sim()
z5$graph()

This simulation shows the expected values for fare, with all other features set to their default values. The survival value in the Titanic appears to remain constant (for the most part).

Sex Difference

z5=zlogit$new()
z5$zelig(survival ~ age + sex * pclass + fare, data = titanic)
z5$setx(sex="male")
z5$setx1(sex="female")
z5$sim()
summary(z5)
## 
##  sim x :
##  -----
## ev
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.1396913 0.01971671 0.1385753 0.1037234 0.1798837
## pv
##          0     1
## [1,] 0.869 0.131
## 
##  sim x1 :
##  -----
## ev
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.3976795 0.04283382 0.3984457 0.3114398 0.4798943
## pv
##          0     1
## [1,] 0.588 0.412
## fd
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.2579882 0.04337139 0.2579938 0.1714072 0.3437799

First Difference

fd <- z5$get_qi(xvalue="x1", qi="fd")
summary(fd)
##        V1        
##  Min.   :0.1264  
##  1st Qu.:0.2286  
##  Median :0.2580  
##  Mean   :0.2580  
##  3rd Qu.:0.2875  
##  Max.   :0.3951
par("mar")
## [1] 5.1 4.1 4.1 2.1
par(mar= c(1,1,1,1))
z5$graph()

This simulation shows the expected values for sex difference, with all other features set to their default values. The survival value in the Titanic is greater for females than males (difference is about .25)

Class Varation in Sex Differences

First Class

z5_1c=zlogit$new()
z5_1c$zelig(survival ~ age + sex * pclass + fare, data = titanic)
z5_1c$setx(sex="male",pclass="1st")
z5_1c$setx1(sex="female",pclass="1st")
z5_1c$sim()
par("mar")
## [1] 5.1 4.1 4.1 2.1
par(mar= c(1,1,1,1))
z5_1c$graph()

The survival value for first class appears is greater for females than males (difference is about .5-.55) Females appear to have nearly 100% expected surivial value.

Second Class

z5_2c=zlogit$new()
z5_2c$zelig(survival ~ age + sex * pclass + fare, data = titanic)
z5_2c$setx(sex="male",pclass="2nd")
z5_2c$setx1(sex="female",pclass="2nd")
z5_2c$sim()
par("mar")
## [1] 5.1 4.1 4.1 2.1
par(mar= c(1,1,1,1))
z5_2c$graph()

The survival value for second class appears is again greater for females than males (difference is about .75-.80). Both males and females survival value decreased compared to first class.

Third Class

z5_3c=zlogit$new()
z5_3c$zelig(survival ~ age + sex * pclass + fare, data = titanic)
z5_3c$setx(sex="male",pclass="3rd")
z5_3c$setx1(sex="female",pclass="3rd")
z5_3c$sim()
par("mar")
## [1] 5.1 4.1 4.1 2.1
par(mar= c(1,1,1,1))
z5_3c$graph()

The survival value for third class appears is again greater for females than males (difference is about .25). Both males and females survival value decreased compared to first class and second class.

Putting them in one place

d1 <- z5_1c$get_qi(xvalue="x1", qi="fd")
d2 <- z5_2c$get_qi(xvalue="x1", qi="fd")
d3 <- z5_3c$get_qi(xvalue="x1", qi="fd")

dfd <- as.data.frame(cbind(d1, d2, d3))
head(dfd)
library(tidyr)

tidd <- dfd %>% 
  gather(class, simv, 1:3)
head(tidd)
library(dplyr)

tidd %>% 
  group_by(class) %>% 
  summarise(mean = mean(simv), sd = sd(simv))
library(ggplot2)

ggplot(tidd, aes(simv)) + geom_histogram() + facet_grid(~class)

This shows the simulated expected first difference betwene males and females across all three classes. The survival value is the lowest in third class (already explained above).