#Assignment#6.1: Re-implementation of Slide 5.2 to 5.18 using Zelig 5 syntax

library(Zelig)
## Loading required package: survival
library(DAAG)
## Loading required package: lattice
## 
## Attaching package: 'DAAG'
## The following object is masked from 'package:survival':
## 
##     lung
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1     ✔ purrr   0.2.4
## ✔ tibble  1.4.2     ✔ dplyr   0.7.4
## ✔ tidyr   0.8.0     ✔ stringr 1.2.0
## ✔ readr   1.1.1     ✔ forcats 0.2.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ purrr::reduce() masks Zelig::reduce()
library(dplyr)
library(tidyr)
library(ggplot2)
library(survival)
library(radiant.data)
## Loading required package: magrittr
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
## Loading required package: lubridate
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
## 
## Attaching package: 'radiant.data'
## The following objects are masked from 'package:lubridate':
## 
##     month, wday
## The following object is masked from 'package:forcats':
## 
##     as_factor
## The following object is masked from 'package:dplyr':
## 
##     mutate_each
## The following object is masked from 'package:purrr':
## 
##     is_empty
## The following object is masked from 'package:ggplot2':
## 
##     diamonds
data(titanic)
head(titanic)
##   pclass survived    sex     age sibsp parch     fare                            name   cabin    embarked
## 1    1st      Yes female 29.0000     0     0 211.3375   Allen, Miss. Elisabeth Walton      B5 Southampton
## 2    1st      Yes   male  0.9167     1     2 151.5500  Allison, Master. Hudson Trevor C22 C26 Southampton
## 3    1st       No female  2.0000     1     2 151.5500    Allison, Miss. Helen Loraine C22 C26 Southampton
## 4    1st       No   male 30.0000     1     2 151.5500 Allison, Mr. Hudson Joshua Crei C22 C26 Southampton
## 5    1st       No female 25.0000     1     2 151.5500 Allison, Mrs. Hudson J C (Bessi C22 C26 Southampton
## 6    1st      Yes   male 48.0000     0     0  26.5500             Anderson, Mr. Harry     E12 Southampton
titanic <- titanic %>% 
  mutate(survived1 = as.integer(survived)) %>%
  mutate (age = as.integer(age)) %>%
  mutate(survival = sjmisc::rec(survived1, rec = "2=0; 1=1")) %>%
  select(survived, survival, age, sex, pclass, fare)
head(titanic)
##   survived survival age    sex pclass     fare
## 1      Yes        1  29 female    1st 211.3375
## 2      Yes        1   0   male    1st 151.5500
## 3       No        0   2 female    1st 151.5500
## 4       No        0  30   male    1st 151.5500
## 5       No        0  25 female    1st 151.5500
## 6      Yes        1  48   male    1st  26.5500
max(titanic$age)
## [1] 80
z5 <- zlogit$new()
z5$zelig(survival ~ age + sex*pclass + fare,data = titanic)
summary(z5)
## Model: 
## 
## Call:
## z5$zelig(formula = survival ~ age + sex * pclass + fare, data = titanic)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.0628  -0.6636  -0.4941   0.4337   2.4940  
## 
## Coefficients:
##                     Estimate Std. Error z value            Pr(>|z|)
## (Intercept)        4.8959649  0.6128145   7.989 0.00000000000000136
## age               -0.0386781  0.0067926  -5.694 0.00000001239977237
## sexmale           -3.9001038  0.5015680  -7.776 0.00000000000000750
## pclass2nd         -1.5922712  0.6024689  -2.643             0.00822
## pclass3rd         -4.1381922  0.5601346  -7.388 0.00000000000014922
## fare              -0.0009074  0.0020510  -0.442             0.65820
## sexmale:pclass2nd -0.0603255  0.6373047  -0.095             0.92459
## sexmale:pclass3rd  2.5016703  0.5479814   4.565 0.00000498908018340
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1409.99  on 1042  degrees of freedom
## Residual deviance:  931.42  on 1035  degrees of freedom
## AIC: 947.42
## 
## Number of Fisher Scoring iterations: 5
## 
## Next step: Use 'setx' method

Age effect

summary(titanic$age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   21.00   28.00   29.79   39.00   80.00
z5$setrange(age = 0:80)
z5$sim()
z5$graph()

#Fare effect

summary(titanic$fare)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    8.05   15.75   36.60   35.08  512.33
z5$setrange(fare = 0:512.33)
z5$sim()
z5$graph()

Sex difference

z5.sex <- zlogit$new()
z5.sex$zelig(survival ~ age + sex*pclass + fare, data = titanic)
  z5.sex$setx(sex = "male") 
  z5.sex$setx1(sex = "female")
  z5.sex$sim()
  summary(z5.sex)
## 
##  sim x :
##  -----
## ev
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.1393829 0.01928101 0.1388332 0.1056682 0.1789379
## pv
##          0     1
## [1,] 0.866 0.134
## 
##  sim x1 :
##  -----
## ev
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.3951089 0.04185373 0.3941491 0.3132035 0.4800687
## pv
##          0     1
## [1,] 0.596 0.404
## fd
##          mean         sd       50%      2.5%     97.5%
## [1,] 0.255726 0.04353896 0.2547804 0.1734585 0.3402523
fd <- z5.sex$get_qi (xvalue = "x1", qi = "fd")
summary(fd)
##        V1        
##  Min.   :0.1100  
##  1st Qu.:0.2269  
##  Median :0.2548  
##  Mean   :0.2557  
##  3rd Qu.:0.2849  
##  Max.   :0.3863
plot(z5.sex)

How to test the class variations in sex difference using Zelig?

First class

z5.c1s <- zlogit$new()
z5.c1s$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z5.c1s$setx(sex = "male", pclass = "1st") 
z5.c1s$setx1(sex = "female", pclass = "1st")
z5.c1s$sim()

Second class

z5.c2s <- zlogit$new()
z5.c2s$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z5.c2s$setx(sex = "male", pclass = "2nd")
z5.c2s$setx1(sex = "female", pclass = "2nd")
z5.c2s$sim()

Third class

z5.c3s <- zlogit$new()
z5.c3s$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z5.c3s$setx(sex = "male", pclass = "3rd")
z5.c3s$setx1(sex = "female", pclass = "3rd")
z5.c3s$sim()
plot(z5.c1s)

plot(z5.c2s)

plot(z5.c3s)

Putting them in one place

d1 <- z5.c1s$get_qi(xvalue="x1", qi="fd")
d2 <- z5.c2s$get_qi(xvalue="x1", qi="fd")
d3 <- z5.c3s$get_qi(xvalue="x1", qi="fd")
  
dfd <- as.data.frame(cbind(d1, d2, d3))
head(dfd)
##          V1        V2        V3
## 1 0.6268752 0.7379985 0.2538173
## 2 0.6119724 0.8209339 0.2662584
## 3 0.5880165 0.7573147 0.2862220
## 4 0.4676590 0.7817792 0.2528908
## 5 0.4940628 0.7512419 0.2061413
## 6 0.4770414 0.8014236 0.3001763
tidd <- dfd %>% 
  gather(class, simv, 1:3)
head(tidd)
##   class      simv
## 1    V1 0.6268752
## 2    V1 0.6119724
## 3    V1 0.5880165
## 4    V1 0.4676590
## 5    V1 0.4940628
## 6    V1 0.4770414
library(dplyr)

tidd %>% 
  group_by(class) %>% 
  summarise(mean = mean(simv), sd = sd(simv))
## # A tibble: 3 x 3
##   class  mean     sd
##   <chr> <dbl>  <dbl>
## 1 V1    0.517 0.0500
## 2 V2    0.750 0.0435
## 3 V3    0.255 0.0420
library(ggplot2)

ggplot(tidd, aes(simv)) + geom_histogram() + facet_grid(~class)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.