This will consist of recreating slides 5.2 through 5.18 using Zelig 5 syntax.
library(dplyr)
library(Zelig)
library(texreg)
library(visreg)
library(ggplot2)
library(radiant.data)
data("titanic")
head(titanic)
## # A tibble: 6 x 10
## pclass survived sex age sibsp parch fare name cabin embarked
## <fct> <fct> <fct> <dbl> <int> <int> <dbl> <chr> <chr> <fct>
## 1 1st Yes female 29 0 0 211. Allen, Mi… B5 Southam…
## 2 1st Yes male 0.917 1 2 152. Allison, … C22 … Southam…
## 3 1st No female 2 1 2 152. Allison, … C22 … Southam…
## 4 1st No male 30 1 2 152. Allison, … C22 … Southam…
## 5 1st No female 25 1 2 152. Allison, … C22 … Southam…
## 6 1st Yes male 48 0 0 26.5 Anderson,… E12 Southam…
titanic <- titanic %>%
mutate(surv = as.integer(survived)) %>%
mutate(survival = sjmisc::rec(surv, rec = "2=0; 1=1")) %>%
select(pclass, survived, survival, everything())
## Package `snakecase` needs to be installed for case-conversion.
library(Zelig)
z5_titanic <- zlogit$new()
z5_titanic$zelig(survival ~ age + sex*pclass + fare, data = titanic)
summary(z5_titanic)
## Model:
##
## Call:
## z5_titanic$zelig(formula = survival ~ age + sex * pclass + fare,
## data = titanic)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.0634 -0.6641 -0.4943 0.4336 2.4941
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.8978215 0.6131092 7.988 1.37e-15
## age -0.0387245 0.0068044 -5.691 1.26e-08
## sexmale -3.8996177 0.5015659 -7.775 7.55e-15
## pclass2nd -1.5923247 0.6024844 -2.643 0.00822
## pclass3rd -4.1382715 0.5601819 -7.387 1.50e-13
## fare -0.0009058 0.0020509 -0.442 0.65874
## sexmale:pclass2nd -0.0600076 0.6372949 -0.094 0.92498
## sexmale:pclass3rd 2.5019110 0.5479901 4.566 4.98e-06
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1409.99 on 1042 degrees of freedom
## Residual deviance: 931.45 on 1035 degrees of freedom
## AIC: 947.45
##
## Number of Fisher Scoring iterations: 5
##
## Next step: Use 'setx' method
z5_titanic$setrange( age = min(titanic$age): max(titanic$age))
z5_titanic$sim()
z5_titanic$graph()
z5_titanic$setrange(fare = min(titanic$fare) : max(titanic$fare))
z5_titanic$sim()
par("mar")
## [1] 5.1 4.1 4.1 2.1
par(mar = c(1,1,1,1))
z5_titanic$graph()
z5_titanic2 <- zlogit$new()
z5_titanic2$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z5_titanic2$setx (sex = 'male')
z5_titanic2$setx1 (sex = 'female')
z5_titanic2$sim()
summary(z5_titanic2)
##
## sim x :
## -----
## ev
## mean sd 50% 2.5% 97.5%
## [1,] 0.1391428 0.0193732 0.1376529 0.1053389 0.1782404
## pv
## 0 1
## [1,] 0.858 0.142
##
## sim x1 :
## -----
## ev
## mean sd 50% 2.5% 97.5%
## [1,] 0.3944476 0.04358307 0.3936464 0.3110566 0.4836436
## pv
## 0 1
## [1,] 0.62 0.38
## fd
## mean sd 50% 2.5% 97.5%
## [1,] 0.2553048 0.04380457 0.2532073 0.1732435 0.3433527
fd <- z5_titanic2$get_qi(xvalue = 'x1', qi = 'fd')
summary(fd)
## V1
## Min. :0.1330
## 1st Qu.:0.2257
## Median :0.2532
## Mean :0.2553
## 3rd Qu.:0.2861
## Max. :0.4015
par("mar")
## [1] 5.1 4.1 4.1 2.1
par(mar = c(1,1,1,1))
z5_titanic2$graph()
First Class:
z5_titanic3 <- zlogit$new()
z5_titanic3$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z5_titanic3$setx(sex = 'male', pclass = '1st')
z5_titanic3$setx1(sex = 'female', pclass = '1st')
z5_titanic3$sim()
par("mar")
## [1] 5.1 4.1 4.1 2.1
par(mar = c(1,1,1,1))
z5_titanic3$graph()
Second Class:
z5_titanic4 <- zlogit$new()
z5_titanic4$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z5_titanic4$setx(sex = 'male', pclass = '2nd')
z5_titanic4$setx1(sex = 'female', pclass = '2nd')
z5_titanic4$sim()
par("mar")
## [1] 5.1 4.1 4.1 2.1
par(mar = c(1,1,1,1))
z5_titanic4$graph()
Third Class:
z5_titanic5 <- zlogit$new()
z5_titanic5$zelig(survival ~ age + sex*pclass + fare, data = titanic)
z5_titanic5$setx(sex = 'male', pclass = '3rd')
z5_titanic5$setx1(sex = 'female', pclass = '3rd')
z5_titanic5$sim()
par("mar")
## [1] 5.1 4.1 4.1 2.1
par(mar = c(1,1,1,1))
z5_titanic5$graph()
Putting them in one place:
d1 <- z5_titanic3$get_qi(xvalue = 'x1', qi = 'fd')
d2 <- z5_titanic4$get_qi(xvalue = 'x1', qi = 'fd')
d3 <- z5_titanic5$get_qi(xvalue = 'x1', qi = 'fd')
dfd <- as.data.frame(cbind(d1, d2, d3))
head(dfd)
## V1 V2 V3
## 1 0.5325618 0.7513733 0.3433314
## 2 0.4660529 0.8019392 0.3231915
## 3 0.4906844 0.7830827 0.2865025
## 4 0.4940944 0.7806258 0.1660193
## 5 0.5724267 0.7335335 0.2226116
## 6 0.5310390 0.7385672 0.3009164
tidd <- dfd %>%
gather(class, simv, 1:3)
head(tidd)
## class simv
## 1 V1 0.5325618
## 2 V1 0.4660529
## 3 V1 0.4906844
## 4 V1 0.4940944
## 5 V1 0.5724267
## 6 V1 0.5310390
tidd %>%
group_by(class) %>%
summarize(mean = mean(simv), sd = sd(simv))
## # A tibble: 3 x 3
## class mean sd
## <chr> <dbl> <dbl>
## 1 V1 0.519 0.0515
## 2 V2 0.750 0.0423
## 3 V3 0.254 0.0442
ggplot(tidd, aes(simv)) + geom_histogram(fill = "mediumpurple1", color = 'purple') + facet_grid(~class)
This will consist of analyzing the “Police Killing” data from GitHub.
This data set consists of the names, ages, genders, states, cities, etc. of individuals killed by police officers in the United States in 2015.
library(dplyr)
library(Zelig)
library(texreg)
library(visreg)
library(ggplot2)
library(readr)
library(dplyr)
police_killings <- read_csv("/Users/rachel_ramphal/Documents/police_killings.csv")
police_kills <- mutate(police_killings, genderNew = recode(gender, "Male" = 0, "Female" = 1))
police_kill <- select(police_kills, 'age', 'genderNew', 'raceethnicity', 'month', 'state', 'armed', 'cause')
head(police_kill)
## # A tibble: 6 x 7
## age genderNew raceethnicity month state armed cause
## <chr> <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 16 0 Black February AL No Gunshot
## 2 27 0 White April LA No Gunshot
## 3 26 0 White March WI No Gunshot
## 4 25 0 Hispanic/Latino March CA Firearm Gunshot
## 5 29 0 White March OH No Gunshot
## 6 29 0 White March AZ No Gunshot
dim(police_kill)
## [1] 467 7
This shows the total number of observations and variables in my dataset.
model_1 <- zlogit$new()
model_1$zelig(genderNew ~ age + armed + cause + raceethnicity, data = police_kill)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(model_1)
## Model:
##
## Call:
## model_1$zelig(formula = genderNew ~ age + armed + cause + raceethnicity,
## data = police_kill)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.28063 -0.28961 -0.00003 -0.00002 2.29190
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.896e+01 2.897e+04 -0.001 0.999
## age17 2.150e+01 1.683e+04 0.001 0.999
## age18 3.631e-01 1.954e+04 0.000 1.000
## age19 -6.915e-01 2.088e+04 0.000 1.000
## age20 1.941e+01 1.683e+04 0.001 0.999
## age21 7.813e-01 1.929e+04 0.000 1.000
## age22 1.659e+00 1.876e+04 0.000 1.000
## age23 -3.904e-03 1.879e+04 0.000 1.000
## age24 1.977e+01 1.683e+04 0.001 0.999
## age25 1.971e+01 1.683e+04 0.001 0.999
## age26 1.943e+01 1.683e+04 0.001 0.999
## age27 7.013e-01 1.850e+04 0.000 1.000
## age28 1.927e+01 1.683e+04 0.001 0.999
## age29 4.949e-01 1.810e+04 0.000 1.000
## age30 1.867e-01 1.973e+04 0.000 1.000
## age31 5.348e-01 1.817e+04 0.000 1.000
## age32 8.381e-01 1.844e+04 0.000 1.000
## age33 1.058e+00 1.856e+04 0.000 1.000
## age34 2.024e+01 1.683e+04 0.001 0.999
## age35 6.410e-01 1.812e+04 0.000 1.000
## age36 3.676e-01 1.823e+04 0.000 1.000
## age37 2.007e+01 1.683e+04 0.001 0.999
## age38 1.995e+01 1.683e+04 0.001 0.999
## age39 2.006e+01 1.683e+04 0.001 0.999
## age40 4.583e-01 1.863e+04 0.000 1.000
## age41 -2.741e-02 1.850e+04 0.000 1.000
## age42 7.972e-01 1.853e+04 0.000 1.000
## age43 2.038e+01 1.683e+04 0.001 0.999
## age44 2.342e-01 2.129e+04 0.000 1.000
## age45 4.025e-01 1.940e+04 0.000 1.000
## age46 2.027e+01 1.683e+04 0.001 0.999
## age47 1.263e-01 1.879e+04 0.000 1.000
## age48 -5.559e-01 2.104e+04 0.000 1.000
## age49 2.020e+01 1.683e+04 0.001 0.999
## age50 6.873e-01 2.219e+04 0.000 1.000
## age51 5.253e-01 1.952e+04 0.000 1.000
## age52 8.880e-01 2.091e+04 0.000 1.000
## age53 1.986e+01 1.683e+04 0.001 0.999
## age54 5.612e-01 1.993e+04 0.000 1.000
## age55 8.308e-01 2.662e+04 0.000 1.000
## age56 -3.830e-01 2.160e+04 0.000 1.000
## age57 1.427e-01 2.028e+04 0.000 1.000
## age58 3.218e-01 2.229e+04 0.000 1.000
## age59 -1.925e-02 2.116e+04 0.000 1.000
## age60 4.262e-01 2.379e+04 0.000 1.000
## age61 5.076e-01 2.658e+04 0.000 1.000
## age62 -1.264e+00 2.552e+04 0.000 1.000
## age63 -4.438e-01 2.169e+04 0.000 1.000
## age64 1.953e+01 1.683e+04 0.001 0.999
## age67 3.369e-01 3.373e+04 0.000 1.000
## age68 1.210e+00 2.488e+04 0.000 1.000
## age69 -1.603e+00 3.373e+04 0.000 1.000
## age71 -1.685e+00 3.373e+04 0.000 1.000
## age72 -1.025e+00 2.595e+04 0.000 1.000
## age74 -1.103e-01 3.373e+04 0.000 1.000
## age75 7.861e-01 3.373e+04 0.000 1.000
## age76 7.861e-01 3.373e+04 0.000 1.000
## age77 -1.713e-01 3.373e+04 0.000 1.000
## age83 2.759e-01 3.373e+04 0.000 1.000
## age87 -1.542e+00 3.373e+04 0.000 1.000
## ageUnknown 2.087e+01 1.683e+04 0.001 0.999
## armedFirearm -6.822e-01 2.271e+04 0.000 1.000
## armedKnife -1.131e+00 2.271e+04 0.000 1.000
## armedNo -6.212e-01 2.271e+04 0.000 1.000
## armedNon-lethal firearm -1.535e+00 2.271e+04 0.000 1.000
## armedOther -1.989e+01 2.318e+04 -0.001 0.999
## armedUnknown -1.863e+01 2.449e+04 -0.001 0.999
## armedVehicle -7.938e-01 2.271e+04 0.000 1.000
## causeGunshot 1.778e+01 6.322e+03 0.003 0.998
## causeStruck by vehicle 1.966e+01 6.322e+03 0.003 0.998
## causeTaser 1.725e+01 6.322e+03 0.003 0.998
## causeUnknown -1.958e-01 1.478e+04 0.000 1.000
## raceethnicityBlack 4.098e-01 1.382e+00 0.297 0.767
## raceethnicityHispanic/Latino -1.224e+00 1.710e+00 -0.716 0.474
## raceethnicityNative American -1.867e+01 1.348e+04 -0.001 0.999
## raceethnicityUnknown 1.984e+00 1.837e+00 1.080 0.280
## raceethnicityWhite -3.742e-02 1.340e+00 -0.028 0.978
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 177.38 on 466 degrees of freedom
## Residual deviance: 113.47 on 390 degrees of freedom
## AIC: 267.47
##
## Number of Fisher Scoring iterations: 20
##
## Next step: Use 'setx' method
This is a regression model that estimates the correlation between gender and age, whether the deceased was armed, their cause of death, and their race/ethnicity. The table is not the best as it lists each age separately, it makes it difficult to determine a relationship between gender and age.