Part I:

This will consist of recreating slides 5.2 through 5.18 using Zelig 5 syntax.

Importing Libraries & Data

library(dplyr)
library(Zelig)
library(texreg)
library(visreg)
library(ggplot2)
library(radiant.data)
data("titanic")
head(titanic)
## # A tibble: 6 x 10
##   pclass survived sex       age sibsp parch  fare name       cabin embarked
##   <fct>  <fct>    <fct>   <dbl> <int> <int> <dbl> <chr>      <chr> <fct>   
## 1 1st    Yes      female 29         0     0 211.  Allen, Mi… B5    Southam…
## 2 1st    Yes      male    0.917     1     2 152.  Allison, … C22 … Southam…
## 3 1st    No       female  2         1     2 152.  Allison, … C22 … Southam…
## 4 1st    No       male   30         1     2 152.  Allison, … C22 … Southam…
## 5 1st    No       female 25         1     2 152.  Allison, … C22 … Southam…
## 6 1st    Yes      male   48         0     0  26.5 Anderson,… E12   Southam…

Reorganizing the Data

titanic <- titanic %>%
    mutate(surv = as.integer(survived)) %>%
    mutate(survival = sjmisc::rec(surv, rec = "2=0; 1=1")) %>%
    select(pclass, survived, survival, everything())
## Package `snakecase` needs to be installed for case-conversion.

Interaction Model

Slide 5.2
library(Zelig)
z5_titanic <- zlogit$new()
z5_titanic$zelig(survival ~ age + sex*pclass + fare, data = titanic)
summary(z5_titanic)
## Model: 
## 
## Call:
## z5_titanic$zelig(formula = survival ~ age + sex * pclass + fare, 
##     data = titanic)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.0634  -0.6641  -0.4943   0.4336   2.4941  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)
## (Intercept)        4.8978215  0.6131092   7.988 1.37e-15
## age               -0.0387245  0.0068044  -5.691 1.26e-08
## sexmale           -3.8996177  0.5015659  -7.775 7.55e-15
## pclass2nd         -1.5923247  0.6024844  -2.643  0.00822
## pclass3rd         -4.1382715  0.5601819  -7.387 1.50e-13
## fare              -0.0009058  0.0020509  -0.442  0.65874
## sexmale:pclass2nd -0.0600076  0.6372949  -0.094  0.92498
## sexmale:pclass3rd  2.5019110  0.5479901   4.566 4.98e-06
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1409.99  on 1042  degrees of freedom
## Residual deviance:  931.45  on 1035  degrees of freedom
## AIC: 947.45
## 
## Number of Fisher Scoring iterations: 5
## 
## Next step: Use 'setx' method

Age Effect

Slide 5.3
z5_titanic$setrange( age = min(titanic$age): max(titanic$age))
z5_titanic$sim()
Slide 5.4
z5_titanic$graph()

Fare Effect

Slide 5.5
z5_titanic$setrange(fare = min(titanic$fare) : max(titanic$fare))
z5_titanic$sim()
par("mar")
## [1] 5.1 4.1 4.1 2.1
Slide 5.6
par(mar = c(1,1,1,1))
z5_titanic$graph()

Sex Difference

Slides 5.7-5.9
z5_titanic2 <- zlogit$new()
z5_titanic2$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z5_titanic2$setx (sex = 'male')
z5_titanic2$setx1 (sex = 'female')
z5_titanic2$sim()
summary(z5_titanic2)
## 
##  sim x :
##  -----
## ev
##           mean        sd       50%      2.5%     97.5%
## [1,] 0.1391428 0.0193732 0.1376529 0.1053389 0.1782404
## pv
##          0     1
## [1,] 0.858 0.142
## 
##  sim x1 :
##  -----
## ev
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.3944476 0.04358307 0.3936464 0.3110566 0.4836436
## pv
##         0    1
## [1,] 0.62 0.38
## fd
##           mean         sd       50%      2.5%     97.5%
## [1,] 0.2553048 0.04380457 0.2532073 0.1732435 0.3433527
fd <- z5_titanic2$get_qi(xvalue = 'x1', qi = 'fd')
summary(fd)
##        V1        
##  Min.   :0.1330  
##  1st Qu.:0.2257  
##  Median :0.2532  
##  Mean   :0.2553  
##  3rd Qu.:0.2861  
##  Max.   :0.4015
par("mar")
## [1] 5.1 4.1 4.1 2.1
par(mar = c(1,1,1,1))
z5_titanic2$graph()

Slides 5.11-5.14

First Class:

z5_titanic3 <- zlogit$new()
z5_titanic3$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z5_titanic3$setx(sex = 'male', pclass = '1st')
z5_titanic3$setx1(sex = 'female', pclass = '1st')
z5_titanic3$sim()
par("mar")
## [1] 5.1 4.1 4.1 2.1
par(mar = c(1,1,1,1))
z5_titanic3$graph()

Second Class:

z5_titanic4 <- zlogit$new()
z5_titanic4$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z5_titanic4$setx(sex = 'male', pclass = '2nd')
z5_titanic4$setx1(sex = 'female', pclass = '2nd')
z5_titanic4$sim()
par("mar")
## [1] 5.1 4.1 4.1 2.1
par(mar = c(1,1,1,1))
z5_titanic4$graph()

Third Class:

z5_titanic5 <- zlogit$new()
z5_titanic5$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z5_titanic5$setx(sex = 'male', pclass = '3rd')
z5_titanic5$setx1(sex = 'female', pclass = '3rd')
z5_titanic5$sim()
par("mar")
## [1] 5.1 4.1 4.1 2.1
par(mar = c(1,1,1,1))
z5_titanic5$graph()

Slides 5.15-5.18

Putting them in one place:

d1 <- z5_titanic3$get_qi(xvalue = 'x1', qi = 'fd')
d2 <- z5_titanic4$get_qi(xvalue = 'x1', qi = 'fd')
d3 <- z5_titanic5$get_qi(xvalue = 'x1', qi = 'fd')

dfd <- as.data.frame(cbind(d1, d2, d3))
head(dfd)
##          V1        V2        V3
## 1 0.5325618 0.7513733 0.3433314
## 2 0.4660529 0.8019392 0.3231915
## 3 0.4906844 0.7830827 0.2865025
## 4 0.4940944 0.7806258 0.1660193
## 5 0.5724267 0.7335335 0.2226116
## 6 0.5310390 0.7385672 0.3009164
tidd <- dfd %>%
    gather(class, simv, 1:3)
head(tidd)
##   class      simv
## 1    V1 0.5325618
## 2    V1 0.4660529
## 3    V1 0.4906844
## 4    V1 0.4940944
## 5    V1 0.5724267
## 6    V1 0.5310390
tidd %>%
    group_by(class) %>%
    summarize(mean = mean(simv), sd = sd(simv))
## # A tibble: 3 x 3
##   class  mean     sd
##   <chr> <dbl>  <dbl>
## 1 V1    0.519 0.0515
## 2 V2    0.750 0.0423
## 3 V3    0.254 0.0442
ggplot(tidd, aes(simv)) + geom_histogram(fill = "mediumpurple1", color = 'purple') + facet_grid(~class)

Part II:

This will consist of analyzing the “Police Killing” data from GitHub.

This data set consists of the names, ages, genders, states, cities, etc. of individuals killed by police officers in the United States in 2015.

Importing Libraries & Data

library(dplyr)
library(Zelig)
library(texreg)
library(visreg)
library(ggplot2)
library(readr)
library(dplyr)
police_killings <- read_csv("/Users/rachel_ramphal/Documents/police_killings.csv")
police_kills <- mutate(police_killings, genderNew = recode(gender, "Male" = 0, "Female" = 1)) 
police_kill <- select(police_kills, 'age', 'genderNew', 'raceethnicity', 'month', 'state', 'armed', 'cause')

head(police_kill)
## # A tibble: 6 x 7
##   age   genderNew raceethnicity   month    state armed   cause  
##   <chr>     <dbl> <chr>           <chr>    <chr> <chr>   <chr>  
## 1 16            0 Black           February AL    No      Gunshot
## 2 27            0 White           April    LA    No      Gunshot
## 3 26            0 White           March    WI    No      Gunshot
## 4 25            0 Hispanic/Latino March    CA    Firearm Gunshot
## 5 29            0 White           March    OH    No      Gunshot
## 6 29            0 White           March    AZ    No      Gunshot
Describing the variables:
  1. age: The age of the deceased.
  2. genderNew: The gender of the deceased.
  3. raceethnicity: Describes the race/ethnicity of the deceased.
  4. month: The month the individual was killed.
  5. state: The state where the death occurred.
  6. armed: Describes if the deceased was armed or not. If yes, tells what type of weapon (ex: knife, gun, vehicle, etc.).
  7. cause: Describes the cause of death of the individual (ex: gunshot, taser, died in custody, struck by vehicle, etc.).
dim(police_kill)
## [1] 467   7

This shows the total number of observations and variables in my dataset.

Estimation Model

model_1 <- zlogit$new()
model_1$zelig(genderNew ~ age + armed + cause + raceethnicity, data = police_kill)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(model_1)
## Model: 
## 
## Call:
## model_1$zelig(formula = genderNew ~ age + armed + cause + raceethnicity, 
##     data = police_kill)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.28063  -0.28961  -0.00003  -0.00002   2.29190  
## 
## Coefficients:
##                                Estimate Std. Error z value Pr(>|z|)
## (Intercept)                  -3.896e+01  2.897e+04  -0.001    0.999
## age17                         2.150e+01  1.683e+04   0.001    0.999
## age18                         3.631e-01  1.954e+04   0.000    1.000
## age19                        -6.915e-01  2.088e+04   0.000    1.000
## age20                         1.941e+01  1.683e+04   0.001    0.999
## age21                         7.813e-01  1.929e+04   0.000    1.000
## age22                         1.659e+00  1.876e+04   0.000    1.000
## age23                        -3.904e-03  1.879e+04   0.000    1.000
## age24                         1.977e+01  1.683e+04   0.001    0.999
## age25                         1.971e+01  1.683e+04   0.001    0.999
## age26                         1.943e+01  1.683e+04   0.001    0.999
## age27                         7.013e-01  1.850e+04   0.000    1.000
## age28                         1.927e+01  1.683e+04   0.001    0.999
## age29                         4.949e-01  1.810e+04   0.000    1.000
## age30                         1.867e-01  1.973e+04   0.000    1.000
## age31                         5.348e-01  1.817e+04   0.000    1.000
## age32                         8.381e-01  1.844e+04   0.000    1.000
## age33                         1.058e+00  1.856e+04   0.000    1.000
## age34                         2.024e+01  1.683e+04   0.001    0.999
## age35                         6.410e-01  1.812e+04   0.000    1.000
## age36                         3.676e-01  1.823e+04   0.000    1.000
## age37                         2.007e+01  1.683e+04   0.001    0.999
## age38                         1.995e+01  1.683e+04   0.001    0.999
## age39                         2.006e+01  1.683e+04   0.001    0.999
## age40                         4.583e-01  1.863e+04   0.000    1.000
## age41                        -2.741e-02  1.850e+04   0.000    1.000
## age42                         7.972e-01  1.853e+04   0.000    1.000
## age43                         2.038e+01  1.683e+04   0.001    0.999
## age44                         2.342e-01  2.129e+04   0.000    1.000
## age45                         4.025e-01  1.940e+04   0.000    1.000
## age46                         2.027e+01  1.683e+04   0.001    0.999
## age47                         1.263e-01  1.879e+04   0.000    1.000
## age48                        -5.559e-01  2.104e+04   0.000    1.000
## age49                         2.020e+01  1.683e+04   0.001    0.999
## age50                         6.873e-01  2.219e+04   0.000    1.000
## age51                         5.253e-01  1.952e+04   0.000    1.000
## age52                         8.880e-01  2.091e+04   0.000    1.000
## age53                         1.986e+01  1.683e+04   0.001    0.999
## age54                         5.612e-01  1.993e+04   0.000    1.000
## age55                         8.308e-01  2.662e+04   0.000    1.000
## age56                        -3.830e-01  2.160e+04   0.000    1.000
## age57                         1.427e-01  2.028e+04   0.000    1.000
## age58                         3.218e-01  2.229e+04   0.000    1.000
## age59                        -1.925e-02  2.116e+04   0.000    1.000
## age60                         4.262e-01  2.379e+04   0.000    1.000
## age61                         5.076e-01  2.658e+04   0.000    1.000
## age62                        -1.264e+00  2.552e+04   0.000    1.000
## age63                        -4.438e-01  2.169e+04   0.000    1.000
## age64                         1.953e+01  1.683e+04   0.001    0.999
## age67                         3.369e-01  3.373e+04   0.000    1.000
## age68                         1.210e+00  2.488e+04   0.000    1.000
## age69                        -1.603e+00  3.373e+04   0.000    1.000
## age71                        -1.685e+00  3.373e+04   0.000    1.000
## age72                        -1.025e+00  2.595e+04   0.000    1.000
## age74                        -1.103e-01  3.373e+04   0.000    1.000
## age75                         7.861e-01  3.373e+04   0.000    1.000
## age76                         7.861e-01  3.373e+04   0.000    1.000
## age77                        -1.713e-01  3.373e+04   0.000    1.000
## age83                         2.759e-01  3.373e+04   0.000    1.000
## age87                        -1.542e+00  3.373e+04   0.000    1.000
## ageUnknown                    2.087e+01  1.683e+04   0.001    0.999
## armedFirearm                 -6.822e-01  2.271e+04   0.000    1.000
## armedKnife                   -1.131e+00  2.271e+04   0.000    1.000
## armedNo                      -6.212e-01  2.271e+04   0.000    1.000
## armedNon-lethal firearm      -1.535e+00  2.271e+04   0.000    1.000
## armedOther                   -1.989e+01  2.318e+04  -0.001    0.999
## armedUnknown                 -1.863e+01  2.449e+04  -0.001    0.999
## armedVehicle                 -7.938e-01  2.271e+04   0.000    1.000
## causeGunshot                  1.778e+01  6.322e+03   0.003    0.998
## causeStruck by vehicle        1.966e+01  6.322e+03   0.003    0.998
## causeTaser                    1.725e+01  6.322e+03   0.003    0.998
## causeUnknown                 -1.958e-01  1.478e+04   0.000    1.000
## raceethnicityBlack            4.098e-01  1.382e+00   0.297    0.767
## raceethnicityHispanic/Latino -1.224e+00  1.710e+00  -0.716    0.474
## raceethnicityNative American -1.867e+01  1.348e+04  -0.001    0.999
## raceethnicityUnknown          1.984e+00  1.837e+00   1.080    0.280
## raceethnicityWhite           -3.742e-02  1.340e+00  -0.028    0.978
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 177.38  on 466  degrees of freedom
## Residual deviance: 113.47  on 390  degrees of freedom
## AIC: 267.47
## 
## Number of Fisher Scoring iterations: 20
## 
## Next step: Use 'setx' method

This is a regression model that estimates the correlation between gender and age, whether the deceased was armed, their cause of death, and their race/ethnicity. The table is not the best as it lists each age separately, it makes it difficult to determine a relationship between gender and age.