Library Setup

Loading Packages

Objective

The goal of this first assignment is to reimplement the code from slides 5.2-5.18 of lecture 6 using Zelig 5. In the lecture notes Zelig 4 was used, however for this assignment the code will be repeated but using zelig 5. Since simulations are involved, some of the numbers might vary.

Loading the Data, Selecting the Variables, and Cleaning the Data

##   pclass survived survival    sex     age sibsp parch     fare                            name   cabin    embarked surv
## 1    1st      Yes        1 female 29.0000     0     0 211.3375   Allen, Miss. Elisabeth Walton      B5 Southampton    1
## 2    1st      Yes        1   male  0.9167     1     2 151.5500  Allison, Master. Hudson Trevor C22 C26 Southampton    1
## 3    1st       No        0 female  2.0000     1     2 151.5500    Allison, Miss. Helen Loraine C22 C26 Southampton    2
## 4    1st       No        0   male 30.0000     1     2 151.5500 Allison, Mr. Hudson Joshua Crei C22 C26 Southampton    2
## 5    1st       No        0 female 25.0000     1     2 151.5500 Allison, Mrs. Hudson J C (Bessi C22 C26 Southampton    2
## 6    1st      Yes        1   male 48.0000     0     0  26.5500             Anderson, Mr. Harry     E12 Southampton    1

Preview Titanic Statistics

##  pclass    survived     survival          sex           age              sibsp            parch             fare            name              cabin                  embarked        surv      
##  1st:282   Yes:425   Min.   :0.0000   female:386   Min.   : 0.1667   Min.   :0.0000   Min.   :0.0000   Min.   :  0.00   Length:1043        Length:1043        Cherbourg  :212   Min.   :1.000  
##  2nd:261   No :618   1st Qu.:0.0000   male  :657   1st Qu.:21.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:  8.05   Class :character   Class :character   Queenstown : 50   1st Qu.:1.000  
##  3rd:500             Median :0.0000                Median :28.0000   Median :0.0000   Median :0.0000   Median : 15.75   Mode  :character   Mode  :character   Southampton:781   Median :2.000  
##                      Mean   :0.4075                Mean   :29.8132   Mean   :0.5043   Mean   :0.4219   Mean   : 36.60                                                           Mean   :1.593  
##                      3rd Qu.:1.0000                3rd Qu.:39.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.: 35.08                                                           3rd Qu.:2.000  
##                      Max.   :1.0000                Max.   :80.0000   Max.   :8.0000   Max.   :6.0000   Max.   :512.33                                                           Max.   :2.000

Interaction Model Using Zelig 5

z.out1 <- zlogit$new()
z.out1$zelig(survival ~ age + sex*pclass + fare, data = titanic)
summary(z.out1)
## Model: 
## 
## Call:
## z.out1$zelig(formula = survival ~ age + sex * pclass + fare, 
##     data = titanic)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.0634  -0.6641  -0.4943   0.4336   2.4941  
## 
## Coefficients:
##                     Estimate Std. Error z value            Pr(>|z|)
## (Intercept)        4.8978215  0.6131092   7.988 0.00000000000000137
## age               -0.0387245  0.0068044  -5.691 0.00000001262563680
## sexmale           -3.8996177  0.5015659  -7.775 0.00000000000000755
## pclass2nd         -1.5923247  0.6024844  -2.643             0.00822
## pclass3rd         -4.1382715  0.5601819  -7.387 0.00000000000014976
## fare              -0.0009058  0.0020509  -0.442             0.65874
## sexmale:pclass2nd -0.0600076  0.6372949  -0.094             0.92498
## sexmale:pclass3rd  2.5019110  0.5479901   4.566 0.00000498035051247
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1409.99  on 1042  degrees of freedom
## Residual deviance:  931.45  on 1035  degrees of freedom
## AIC: 947.45
## 
## Number of Fisher Scoring iterations: 5
## 
## Next step: Use 'setx' method

Age Effect Using Zelig 5

z.out1$setrange(age=0:80)
z.out1$sim()
z.out1$graph()

##Fare Effect Using Zelig 5 The maximum fare value was acquired from the statistics above.

z.out1$setrange(fare=0:513)
z.out1$sim()
z.out1$graph()

##Sex Differences and Simulated First Difference Using Zelig 5

z.out1$setx(sex="male")
z.out1$setx1(sex="female")
z.out1$sim()
fd <- z.out1$get_qi(xvalue="x1", qi="fd")
summary(fd)
##        V1        
##  Min.   :0.1332  
##  1st Qu.:0.2222  
##  Median :0.2537  
##  Mean   :0.2543  
##  3rd Qu.:0.2852  
##  Max.   :0.4144
plot(z.out1)

Testing Class Variations in Sex Difference Using Zelig 5

1st Class

z5.1st <- zlogit$new()
z5.1st$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z5.1st$setx(sex = "male", pclass = "1st") 
z5.1st$setx1(sex = "female", pclass = "1st")
z5.1st$sim()
plot(z5.1st)

Second Class

z5.2nd <- zlogit$new()
z5.2nd$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z5.2nd$setx(sex = "male", pclass = "2nd") 
z5.2nd$setx1(sex = "female", pclass = "2nd")
z5.2nd$sim()
plot(z5.2nd)

Third Class

z5.3rd <- zlogit$new()
z5.3rd$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z5.3rd$setx(sex = "male", pclass = "3rd") 
z5.3rd$setx1(sex = "female", pclass = "3rd")
z5.3rd$sim()
plot(z5.3rd)

Putting FDs Into One Table

d1 <- z5.1st$get_qi(xvalue="x1", qi="fd")
d2 <- z5.2nd$get_qi(xvalue="x1", qi="fd")
d3 <- z5.3rd$get_qi(xvalue="x1", qi="fd")
  
dfd <- as.data.frame(cbind(d1, d2, d3))
head(dfd)
##          V1        V2        V3
## 1 0.4762803 0.6759499 0.2606452
## 2 0.4145390 0.7404043 0.2659030
## 3 0.4738715 0.8000504 0.1981734
## 4 0.5524660 0.6782987 0.3436022
## 5 0.5218583 0.7287588 0.2614466
## 6 0.5704955 0.7261306 0.1809074

Sort by Class

tidd <- dfd %>% 
  gather(class, simv, 1:3)
head(tidd)
##   class      simv
## 1    V1 0.4762803
## 2    V1 0.4145390
## 3    V1 0.4738715
## 4    V1 0.5524660
## 5    V1 0.5218583
## 6    V1 0.5704955

Sort by Group

tidd %>% 
  group_by(class) %>% 
  summarise(mean = mean(simv), sd = sd(simv))
## # A tibble: 3 x 3
##   class  mean     sd
##   <chr> <dbl>  <dbl>
## 1 V1    0.521 0.0497
## 2 V2    0.749 0.0444
## 3 V3    0.255 0.0444

Sort by Plot

ggplot(tidd, aes(simv)) + geom_histogram() + facet_grid(~class)