Using the Zelig 5 Package.

In the below exercises using the class powerpoint examples, we recode the zelig 4 codes to zelig 5 codes.

library (Zelig)
library (texreg)
library (car)
library (mvtnorm)

library(radiant.data)
data(titanic)
head(titanic)

Creating a Binary variable for Survived.

titanic <- titanic %>%
  mutate(surv = as.integer(survived)) %>% 
  mutate(survival = sjmisc::rec(surv, rec = "2=0; 1=1")) %>% 
  select(pclass, survived, survival, everything())

Using Zelig 5 to run a Logit Model

# 5.2 - 5.4
z5 <- zlogit$new()
z5$zelig(survival ~ age + sex*pclass + fare, data = titanic)
summary(z5)
## Model: 
## 
## Call:
## z5$zelig(formula = survival ~ age + sex * pclass + fare, data = titanic)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.0634  -0.6641  -0.4943   0.4336   2.4941  
## 
## Coefficients:
##                     Estimate Std. Error z value            Pr(>|z|)
## (Intercept)        4.8978215  0.6131092   7.988 0.00000000000000137
## age               -0.0387245  0.0068044  -5.691 0.00000001262563680
## sexmale           -3.8996177  0.5015659  -7.775 0.00000000000000755
## pclass2nd         -1.5923247  0.6024844  -2.643             0.00822
## pclass3rd         -4.1382715  0.5601819  -7.387 0.00000000000014976
## fare              -0.0009058  0.0020509  -0.442             0.65874
## sexmale:pclass2nd -0.0600076  0.6372949  -0.094             0.92498
## sexmale:pclass3rd  2.5019110  0.5479901   4.566 0.00000498035051247
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1409.99  on 1042  degrees of freedom
## Residual deviance:  931.45  on 1035  degrees of freedom
## AIC: 947.45
## 
## Number of Fisher Scoring iterations: 5
## 
## Next step: Use 'setx' method
# Simulation for Age
z5$setrange(age=min(titanic$age):max(titanic$age))
z5$sim()
z5$graph()

Simulation for Fare (Continious Variable)

# 5.5 - 5.6
z5$setrange(fare=min(titanic$fare):max(titanic$fare))
z5$sim()
z5$graph()

Simulation for Sex (Categorical Variable) Effect

#5.7
z5.1 <- zlogit$new()
z5.1$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z5.1$setx(sex = "male")
z5.1$setx1(sex = "female")
z5.1$sim()
summary(z5.1)
## 
##  sim x :
##  -----
## ev
##           mean         sd      50%      2.5%     97.5%
## [1,] 0.1406087 0.01939392 0.139718 0.1046529 0.1782703
## pv
##          0     1
## [1,] 0.841 0.159
## 
##  sim x1 :
##  -----
## ev
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.3947618 0.04418977 0.3946738 0.3107491 0.4874459
## pv
##          0     1
## [1,] 0.599 0.401
## fd
##           mean         sd       50%     2.5%     97.5%
## [1,] 0.2541532 0.04432701 0.2536194 0.170042 0.3410147

First Difference (Sex Difference for Surviving)

#5.8
fd <- z5.1$get_qi(xvalue="x1", qi="fd")
summary(fd)
##        V1        
##  Min.   :0.1031  
##  1st Qu.:0.2231  
##  Median :0.2536  
##  Mean   :0.2542  
##  3rd Qu.:0.2830  
##  Max.   :0.3865
plot(z5.1)

Class Difference by Gender in Surviving

#5.11
# First Class
z5.2 <- zlogit$new()
z5.2$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z5.2$setx(sex = "male", pclass = "1st")
z5.2$setx1(sex = "female", pclass = "1st")
z5.2$sim()
summary(z5.2)
## 
##  sim x :
##  -----
## ev
##           mean        sd      50%      2.5%     97.5%
## [1,] 0.4530173 0.0498293 0.454348 0.3539429 0.5533023
## pv
##          0     1
## [1,] 0.535 0.465
## 
##  sim x1 :
##  -----
## ev
##           mean        sd       50%      2.5%     97.5%
## [1,] 0.9723726 0.0149092 0.9759734 0.9358932 0.9904827
## pv
##          0     1
## [1,] 0.023 0.977
## fd
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.5193552 0.04951689 0.5200743 0.4224792 0.6212149
plot(z5.2)

# Second Class
z5.3 <- zlogit$new()
z5.3$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z5.3$setx(sex = "male", pclass = "2nd")
z5.3$setx1(sex = "female", pclass = "2nd")
z5.3$sim()
summary(z5.3)
## 
##  sim x :
##  -----
## ev
##           mean         sd       50%       2.5%     97.5%
## [1,] 0.1401005 0.02850557 0.1375504 0.09146712 0.2040644
## pv
##          0     1
## [1,] 0.869 0.131
## 
##  sim x1 :
##  -----
## ev
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.8901227 0.03154875 0.8929013 0.8169564 0.9403453
## pv
##          0     1
## [1,] 0.125 0.875
## fd
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.7500222 0.04273848 0.7522373 0.6549165 0.8273028
plot(z5.3)

# Third Class
z5.4 <- zlogit$new()
z5.4$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z5.4$setx(sex = "male", pclass = "3rd")
z5.4$setx1(sex = "female", pclass = "3rd")
z5.4$sim()
summary(z5.4)
## 
##  sim x :
##  -----
## ev
##           mean        sd      50%      2.5%     97.5%
## [1,] 0.1393472 0.0188383 0.137288 0.1066486 0.1817797
## pv
##          0     1
## [1,] 0.876 0.124
## 
##  sim x1 :
##  -----
## ev
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.3933843 0.04337551 0.3937098 0.3070246 0.4795909
## pv
##          0     1
## [1,] 0.616 0.384
## fd
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.2540371 0.04341435 0.2546837 0.1699013 0.3424919
plot(z5.4)

Combing Results

d1<-z5.2$get_qi(xvalue="x1", qi="fd")
d2<-z5.3$get_qi(xvalue="x1", qi="fd")
d3<-z5.4$get_qi(xvalue="x1", qi="fd")

dfd <- as.data.frame(cbind(d1, d2, d3))
head(dfd)

GGPLOTS and Table Difference

library(tidyr)

tidd <- dfd %>% 
  gather(class, simv, 1:3)
head(tidd)
library(dplyr)

tidd %>% 
  group_by(class) %>% 
  summarise(mean = mean(simv), sd = sd(simv))
library(ggplot2)

ggplot(tidd, aes(simv)) + geom_histogram() + facet_grid(~class)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.