library(Zelig)
## Warning: package 'Zelig' was built under R version 3.4.3
## Loading required package: survival
library(texreg)
## Warning: package 'texreg' was built under R version 3.4.3
## Version:  1.36.23
## Date:     2017-03-03
## Author:   Philip Leifeld (University of Glasgow)
## 
## Please cite the JSS article in your publications -- see citation("texreg").
library(car)
## Warning: package 'car' was built under R version 3.4.3
library(mvtnorm)
## Warning: package 'mvtnorm' was built under R version 3.4.3
library(radiant.data)
## Warning: package 'radiant.data' was built under R version 3.4.3
## Loading required package: magrittr
## Warning: package 'magrittr' was built under R version 3.4.3
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:texreg':
## 
##     extract
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.4.3
## Loading required package: lubridate
## Warning: package 'lubridate' was built under R version 3.4.3
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
## Loading required package: tidyr
## Warning: package 'tidyr' was built under R version 3.4.3
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:magrittr':
## 
##     extract
## The following object is masked from 'package:texreg':
## 
##     extract
## Loading required package: dplyr
## Warning: package 'dplyr' was built under R version 3.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:lubridate':
## 
##     intersect, setdiff, union
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Attaching package: 'radiant.data'
## The following object is masked from 'package:dplyr':
## 
##     mutate_each
## The following objects are masked from 'package:lubridate':
## 
##     month, wday
## The following object is masked from 'package:ggplot2':
## 
##     diamonds
data(titanic)

Mutate Survival as a binary variable

titanic <- titanic %>%
  mutate(surv = as.integer(survived)) %>% 
  mutate(survival = sjmisc::rec(surv, rec = "2=0; 1=1")) %>% 
  select(pclass, survived, survival, everything())
## Warning: package 'bindrcpp' was built under R version 3.4.3

The Regression result

z.tit<- zlogit$new()
z.tit$zelig(survival ~ age + sex*pclass + fare, data = titanic)
summary(z.tit)
## Model: 
## 
## Call:
## z.tit$zelig(formula = survival ~ age + sex * pclass + fare, data = titanic)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.0634  -0.6641  -0.4943   0.4336   2.4941  
## 
## Coefficients:
##                     Estimate Std. Error z value            Pr(>|z|)
## (Intercept)        4.8978215  0.6131092   7.988 0.00000000000000137
## age               -0.0387245  0.0068044  -5.691 0.00000001262563680
## sexmale           -3.8996177  0.5015659  -7.775 0.00000000000000755
## pclass2nd         -1.5923247  0.6024844  -2.643             0.00822
## pclass3rd         -4.1382715  0.5601819  -7.387 0.00000000000014976
## fare              -0.0009058  0.0020509  -0.442             0.65874
## sexmale:pclass2nd -0.0600076  0.6372949  -0.094             0.92498
## sexmale:pclass3rd  2.5019110  0.5479901   4.566 0.00000498035051247
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1409.99  on 1042  degrees of freedom
## Residual deviance:  931.45  on 1035  degrees of freedom
## AIC: 947.45
## 
## Number of Fisher Scoring iterations: 5
## 
## Next step: Use 'setx' method

Age in Zelig5

z.age <-zlogit$new()
z.age$zelig(survival ~ age + sex*pclass + fare, data = titanic)
summary(z.age)
## Model: 
## 
## Call:
## z.age$zelig(formula = survival ~ age + sex * pclass + fare, data = titanic)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.0634  -0.6641  -0.4943   0.4336   2.4941  
## 
## Coefficients:
##                     Estimate Std. Error z value            Pr(>|z|)
## (Intercept)        4.8978215  0.6131092   7.988 0.00000000000000137
## age               -0.0387245  0.0068044  -5.691 0.00000001262563680
## sexmale           -3.8996177  0.5015659  -7.775 0.00000000000000755
## pclass2nd         -1.5923247  0.6024844  -2.643             0.00822
## pclass3rd         -4.1382715  0.5601819  -7.387 0.00000000000014976
## fare              -0.0009058  0.0020509  -0.442             0.65874
## sexmale:pclass2nd -0.0600076  0.6372949  -0.094             0.92498
## sexmale:pclass3rd  2.5019110  0.5479901   4.566 0.00000498035051247
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1409.99  on 1042  degrees of freedom
## Residual deviance:  931.45  on 1035  degrees of freedom
## AIC: 947.45
## 
## Number of Fisher Scoring iterations: 5
## 
## Next step: Use 'setx' method

Plotting Age using zelig5

z.age$setrange(age = min(titanic$age):max(titanic$age))
z.age$sim()
ci.plot(z.age)

Fare effect using Zelig5

z.fare <-zlogit$new()
z.fare$zelig(survival ~ age + sex*pclass + fare, data = titanic)
summary(z.fare)
## Model: 
## 
## Call:
## z.fare$zelig(formula = survival ~ age + sex * pclass + fare, 
##     data = titanic)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.0634  -0.6641  -0.4943   0.4336   2.4941  
## 
## Coefficients:
##                     Estimate Std. Error z value            Pr(>|z|)
## (Intercept)        4.8978215  0.6131092   7.988 0.00000000000000137
## age               -0.0387245  0.0068044  -5.691 0.00000001262563680
## sexmale           -3.8996177  0.5015659  -7.775 0.00000000000000755
## pclass2nd         -1.5923247  0.6024844  -2.643             0.00822
## pclass3rd         -4.1382715  0.5601819  -7.387 0.00000000000014976
## fare              -0.0009058  0.0020509  -0.442             0.65874
## sexmale:pclass2nd -0.0600076  0.6372949  -0.094             0.92498
## sexmale:pclass3rd  2.5019110  0.5479901   4.566 0.00000498035051247
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1409.99  on 1042  degrees of freedom
## Residual deviance:  931.45  on 1035  degrees of freedom
## AIC: 947.45
## 
## Number of Fisher Scoring iterations: 5
## 
## Next step: Use 'setx' method

Plotting Fare using Zelig 5

z.fare$setrange(fare = min(titanic$fare):max(titanic$fare))
z.fare$sim()
ci.plot(z.fare)

##Sex Effect

z.tit$setx(sex = "male")
z.tit$setx1(sex = "female")
z.tit$sim()
summary(z.tit)
## 
##  sim x :
##  -----
## ev
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.1391369 0.01928226 0.1378931 0.1052618 0.1809891
## pv
##          0     1
## [1,] 0.856 0.144
## 
##  sim x1 :
##  -----
## ev
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.3944577 0.04327657 0.3933851 0.3126716 0.4810452
## pv
##          0     1
## [1,] 0.608 0.392
## fd
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.2553207 0.04281491 0.2532763 0.1783481 0.3398471

First Difference

fd <- z.tit$get_qi(xvalue="x1", qi="fd")
summary(fd)
##        V1        
##  Min.   :0.1369  
##  1st Qu.:0.2240  
##  Median :0.2533  
##  Mean   :0.2553  
##  3rd Qu.:0.2831  
##  Max.   :0.4272

Plot

plot(z.tit)

Sex and Class using Zelig5

First Class

z_firstclass <- zlogit$new()
z_firstclass$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z_firstclass$setx(sex = "male", pclass = "1st" )
z_firstclass$setx1(sex = "female", pclass = "1st")
z_firstclass$sim()
summary(z_firstclass)
## 
##  sim x :
##  -----
## ev
##           mean         sd       50%      2.5%    97.5%
## [1,] 0.4536297 0.04836853 0.4516794 0.3618779 0.555129
## pv
##          0     1
## [1,] 0.548 0.452
## 
##  sim x1 :
##  -----
## ev
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.9732738 0.01318054 0.9758825 0.9409922 0.9910386
## pv
##          0     1
## [1,] 0.026 0.974
## fd
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.5196441 0.04761039 0.5202896 0.4221995 0.6090908

Second Class

z_secondclass <- zlogit$new()
z_secondclass$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z_secondclass$setx(sex = "male", pclass = "2nd")
z_secondclass$setx1(sex =  "female", pclass = "2nd")
z_secondclass$sim()
summary(z_secondclass)
## 
##  sim x :
##  -----
## ev
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.1394437 0.02829056 0.1371306 0.0908669 0.2065318
## pv
##          0     1
## [1,] 0.851 0.149
## 
##  sim x1 :
##  -----
## ev
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.8896451 0.03102167 0.8934756 0.8180124 0.9404066
## pv
##          0     1
## [1,] 0.115 0.885
## fd
##           mean         sd     50%      2.5%     97.5%
## [1,] 0.7502014 0.04285078 0.75379 0.6587655 0.8234875

Third Class

z_thirdclass <- zlogit$new()
z_thirdclass$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z_thirdclass$setx(sex = "male", pclass = "3rd")
z_thirdclass$setx1(sex =  "female", pclass = "3rd")
z_thirdclass$sim()
summary(z_thirdclass)
## 
##  sim x :
##  -----
## ev
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.1402621 0.01953624 0.1388814 0.1062911 0.1810555
## pv
##          0     1
## [1,] 0.876 0.124
## 
##  sim x1 :
##  -----
## ev
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.3934155 0.04322259 0.3928017 0.3088898 0.4786633
## pv
##          0     1
## [1,] 0.591 0.409
## fd
##           mean         sd       50%     2.5%     97.5%
## [1,] 0.2531535 0.04397348 0.2523339 0.168412 0.3419572

Plotting Classes & Sex

First Class

plot(z_firstclass)

Second Class

plot(z_secondclass)

Third Class

plot(z_thirdclass)

Combined Plots

d1 <- z_firstclass$get_qi(xvalue="x1", qi="fd")
d2 <- z_secondclass$get_qi(xvalue="x1", qi="fd")
d3 <- z_thirdclass$get_qi(xvalue="x1", qi="fd")
dfd <- as.data.frame(cbind(d1, d2, d3))
head(dfd)
library(tidyr)
titdf <- dfd %>% 
  gather(class, simv, 1:3)
head(titdf)
library(dplyr)
titdf %>% 
  group_by(class) %>% 
  summarise(mean = mean(simv), sd = sd(simv))
library(ggplot2)
ggplot(titdf, aes(simv)) + geom_histogram() + facet_grid(~class)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.