Safiya Stewart

Sociology 712

Professor Songe

3/22/19

#install.packages("sjmisc")

library(magrittr)
library(tidyverse)
library(readr)
library(knitr)
library(Zelig)
library(radiant.data)

data("titanic")
head(titanic)
## # A tibble: 6 x 10
##   pclass survived sex       age sibsp parch  fare name       cabin embarked
##   <fct>  <fct>    <fct>   <dbl> <int> <int> <dbl> <chr>      <chr> <fct>   
## 1 1st    Yes      female 29         0     0 211.  Allen, Mi… B5    Southam…
## 2 1st    Yes      male    0.917     1     2 152.  Allison, … C22 … Southam…
## 3 1st    No       female  2         1     2 152.  Allison, … C22 … Southam…
## 4 1st    No       male   30         1     2 152.  Allison, … C22 … Southam…
## 5 1st    No       female 25         1     2 152.  Allison, … C22 … Southam…
## 6 1st    Yes      male   48         0     0  26.5 Anderson,… E12   Southam…

MUTATION OF VARIABLE SURV INTO SURVIVAL

titanic <- titanic %>%
  mutate(surv = as.integer(survived)) %>% 
  mutate(survival = sjmisc::rec(surv, rec = "2=0; 1=1")) %>% 
  select(pclass, survived, survival, everything())
head(titanic)
## # A tibble: 6 x 12
##   pclass survived survival sex      age sibsp parch  fare name  cabin
##   <fct>  <fct>       <dbl> <fct>  <dbl> <int> <int> <dbl> <chr> <chr>
## 1 1st    Yes             1 fema… 29         0     0 211.  Alle… B5   
## 2 1st    Yes             1 male   0.917     1     2 152.  Alli… C22 …
## 3 1st    No              0 fema…  2         1     2 152.  Alli… C22 …
## 4 1st    No              0 male  30         1     2 152.  Alli… C22 …
## 5 1st    No              0 fema… 25         1     2 152.  Alli… C22 …
## 6 1st    Yes             1 male  48         0     0  26.5 Ande… E12  
## # … with 2 more variables: embarked <fct>, surv <int>

IMPLEMENTATION OF ZELIG 5 SYNTAX TO HIGHLIGHT NEWLY CREATED DICHOTOMOUS VARIABLE SURVIVAL

z5 <- zlogit$new()
z5$zelig(survival ~ age + sex*pclass + fare, data = titanic)
summary(z5)
## Model: 
## 
## Call:
## z5$zelig(formula = survival ~ age + sex * pclass + fare, data = titanic)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.0634  -0.6641  -0.4943   0.4336   2.4941  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)
## (Intercept)        4.8978215  0.6131092   7.988 1.37e-15
## age               -0.0387245  0.0068044  -5.691 1.26e-08
## sexmale           -3.8996177  0.5015659  -7.775 7.55e-15
## pclass2nd         -1.5923247  0.6024844  -2.643  0.00822
## pclass3rd         -4.1382715  0.5601819  -7.387 1.50e-13
## fare              -0.0009058  0.0020509  -0.442  0.65874
## sexmale:pclass2nd -0.0600076  0.6372949  -0.094  0.92498
## sexmale:pclass3rd  2.5019110  0.5479901   4.566 4.98e-06
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1409.99  on 1042  degrees of freedom
## Residual deviance:  931.45  on 1035  degrees of freedom
## AIC: 947.45
## 
## Number of Fisher Scoring iterations: 5
## 
## Next step: Use 'setx' method

EFFECT OF AGE

z5$setrange(age=min(titanic$age):max(titanic$age))
z5$sim()
z5$graph()

EFFECT OF FARE

z5$setrange(fare=min(titanic$fare):max(titanic$fare))
z5$sim()
z5$graph()

SEX DIFFERENCE

z.sex <- zlogit$new()
z.sex$zelig(survival ~ age + sex*pclass + fare,  data = titanic)
z.sex$setx(sex = "male")
z.sex$setx1(sex = "female")
z.sex$sim()
summary(z.sex)
## 
##  sim x :
##  -----
## ev
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.1406593 0.01970743 0.1397259 0.1064106 0.1817742
## pv
##          0     1
## [1,] 0.851 0.149
## 
##  sim x1 :
##  -----
## ev
##           mean         sd       50%     2.5%     97.5%
## [1,] 0.3968774 0.04676838 0.3964406 0.307353 0.4928045
## pv
##          0     1
## [1,] 0.597 0.403
## fd
##           mean         sd       50%     2.5%     97.5%
## [1,] 0.2562181 0.04644506 0.2562792 0.163559 0.3515011

FIRST DIFFERENCE BETWEEN THE SEXES

fd <- z.sex$get_qi(xvalue="x1", qi="fd")
summary(fd)
##        V1         
##  Min.   :0.09978  
##  1st Qu.:0.22687  
##  Median :0.25628  
##  Mean   :0.25622  
##  3rd Qu.:0.28798  
##  Max.   :0.42362

SEX DIFFERENCES BY CLASS

First Class passengers

z5cl <- zlogit$new()
z5cl$zelig(survival ~ age + sex*pclass + fare,  data = titanic)
z5cl$setx(sex = "male", pclass = "1st")
z5cl$setx1(sex = "female", pclass = "1st")
z5cl$sim()
summary(z5cl)
## 
##  sim x :
##  -----
## ev
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.4521426 0.04947199 0.4517859 0.3567711 0.5489564
## pv
##          0     1
## [1,] 0.554 0.446
## 
##  sim x1 :
##  -----
## ev
##           mean        sd       50%      2.5%     97.5%
## [1,] 0.9734146 0.0134011 0.9759893 0.9393136 0.9908709
## pv
##          0     1
## [1,] 0.027 0.973
## fd
##          mean         sd       50%      2.5%     97.5%
## [1,] 0.521272 0.04854888 0.5224354 0.4256914 0.6168008
plot(z5cl)

Second Class passengers

z5cl2 <- zlogit$new()
z5cl2$zelig(survival ~ age + sex*pclass + fare,  data = titanic)
z5cl2$setx(sex = "male", pclass = "2nd")
z5cl2$setx1(sex = "female", pclass = "2nd")
z5cl2$sim()
summary(z5cl2)
## 
##  sim x :
##  -----
## ev
##           mean         sd       50%       2.5%     97.5%
## [1,] 0.1388147 0.02808807 0.1374725 0.09066829 0.2018672
## pv
##          0     1
## [1,] 0.861 0.139
## 
##  sim x1 :
##  -----
## ev
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.8881749 0.03249202 0.8911112 0.8148349 0.9409201
## pv
##          0     1
## [1,] 0.115 0.885
## fd
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.7493602 0.04324667 0.7527703 0.6595137 0.8273213
plot(z5cl2)

Third class passengers

z5cl3 <- zlogit$new()
z5cl3$zelig(survival ~ age + sex*pclass + fare,  data = titanic)
z5cl3$setx(sex = "male", pclass = "3rd")
z5cl3$setx1(sex = "female", pclass = "3rd")
z5cl3$sim()
summary(z5cl3)
## 
##  sim x :
##  -----
## ev
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.1406689 0.02033217 0.1393531 0.1058836 0.1838825
## pv
##         0    1
## [1,] 0.85 0.15
## 
##  sim x1 :
##  -----
## ev
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.3947143 0.04409182 0.3959683 0.3086232 0.4808728
## pv
##          0     1
## [1,] 0.587 0.413
## fd
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.2540453 0.04361054 0.2561075 0.1662824 0.3379693
plot(z5cl3)

PUTTING THE DATA IN ONE FRAME

d1<-z5cl$get_qi(xvalue="x1", qi="fd")
d2<-z5cl2$get_qi(xvalue="x1", qi="fd")
d3<-z5cl3$get_qi(xvalue="x1", qi="fd")
dfd <- as.data.frame(cbind(d1, d2, d3))
head(dfd)
##          V1        V2        V3
## 1 0.5372015 0.7770912 0.2068092
## 2 0.5164485 0.7382971 0.2154375
## 3 0.5948568 0.6769464 0.2196744
## 4 0.4577496 0.8144584 0.2197770
## 5 0.5903832 0.7186659 0.2406991
## 6 0.5280015 0.7123962 0.2946334

__

library(tidyr)

tidd <- dfd %>% 
  gather(class, simv, 1:3)
head(tidd)
##   class      simv
## 1    V1 0.5372015
## 2    V1 0.5164485
## 3    V1 0.5948568
## 4    V1 0.4577496
## 5    V1 0.5903832
## 6    V1 0.5280015

HOW CLOSE TO THE MEAN IS THE SAMPLING DISTRIBUTION

library(dplyr)
tidd %>% 
group_by(class) %>% 
summarise(mean = mean(simv), sd = sd(simv))
## # A tibble: 3 x 3
##   class  mean     sd
##   <chr> <dbl>  <dbl>
## 1 V1    0.521 0.0485
## 2 V2    0.749 0.0432
## 3 V3    0.254 0.0436

GG PLOT ANALYSIS

library(ggplot2)
ggplot(tidd, aes(simv)) + geom_histogram() + facet_grid(~class)