library(rethinking)
library(tidyverse)
library(knitr)
library(kableExtra)
theme_set(theme_bw())
Is there gender discrimination in graduate admissions? In other
words, does Gender influenwce Admission.
Department mediates admission because different departments have
different slots/fundings and different popularities. So the
admissions_rate is are affected by deparment qualities.
data(UCBadmit)
dat <- UCBadmit %>%
mutate(gender = if_else(applicant.gender=="female", 1,2)) %>%
mutate(department = as.integer(dept)) %>%
select(admit, applications, gender, department)
dat %>%
kbl() %>%
kable_classic(full_width = F, html_font = "Cambria")
| admit | applications | gender | department |
|---|---|---|---|
| 512 | 825 | 2 | 1 |
| 89 | 108 | 1 | 1 |
| 353 | 560 | 2 | 2 |
| 17 | 25 | 1 | 2 |
| 120 | 325 | 2 | 3 |
| 202 | 593 | 1 | 3 |
| 138 | 417 | 2 | 4 |
| 131 | 375 | 1 | 4 |
| 53 | 191 | 2 | 5 |
| 94 | 393 | 1 | 5 |
| 22 | 373 | 2 | 6 |
| 24 | 341 | 1 | 6 |
modelG <- ulam(
alist(
admit ~ binomial(applications,p),
logit(p) <- a[gender],
a[gender] ~ normal(0,1)
),
data = dat, chains=4, cores=12)
precis(modelG, depth=3)
## mean sd 5.5% 94.5% n_eff Rhat4
## a[1] -0.8301325 0.04994838 -0.9091422 -0.7483678 1290.577 1.002414
## a[2] -0.2169773 0.03770973 -0.2764316 -0.1577466 1372.687 1.003007
post1 <- extract.samples(modelG)
prob_admit_female = inv_logit(post1$a[,1])
prob_admit_male = inv_logit(post1$a[,2])
diff_prob <- prob_admit_female - prob_admit_male
dens(diff_prob, lwd=4, col=2, xlab="Gender contrast (probability)")
modelGD <- ulam(
alist(
admit ~ binomial(applications,p),
logit(p) <- a[gender, department],
matrix[gender,department]:a ~ normal(0,1)
),
data = dat, chains=4, cores=12)
Note: if you’re confused about matrix[G,D]:a, its at
53:15 of the video. That gives us a matrix variable a with every
combination.
precis(modelGD, depth=3)
## mean sd 5.5% 94.5% n_eff Rhat4
## a[1,1] 1.4742255 0.23592428 1.10311175 1.8519373 2100.616 0.9982883
## a[1,2] 0.6544687 0.39340643 0.03749995 1.2766999 2588.766 0.9996973
## a[1,3] -0.6602035 0.08728734 -0.80346235 -0.5206728 2118.483 0.9994618
## a[1,4] -0.6171283 0.10368609 -0.78392746 -0.4492815 2249.103 0.9991085
## a[1,5] -1.1444839 0.11544075 -1.33072980 -0.9584639 2273.551 1.0001324
## a[1,6] -2.4923235 0.20385293 -2.82377010 -2.1675226 3038.367 0.9992907
## a[2,1] 0.4894703 0.06971842 0.37821544 0.5997400 2404.143 0.9990085
## a[2,2] 0.5298825 0.08724038 0.39170370 0.6675184 2374.762 1.0026708
## a[2,3] -0.5334707 0.11474720 -0.71588035 -0.3472441 2266.208 1.0000879
## a[2,4] -0.6995687 0.10390152 -0.86433890 -0.5402697 2565.663 0.9986989
## a[2,5] -0.9365873 0.16213350 -1.20039935 -0.6820414 2440.112 0.9989415
## a[2,6] -2.6697049 0.20763792 -3.00528160 -2.3446183 2293.967 0.9985150
w <- xtabs(dat$applications ~ dat$department) / sum(dat$applications)
# i gave up here
# number of applications to simulate
total_apps <- sum(dat$applications)
# number of applications per dept.
df_apps_per_dept <- dat %>% group_by(department) %>%
summarise(n= sum(applications)) %>% arrange(department)
apps_per_dept <- df_apps_per_dept$n
We will simulate if all applications are from women and then from men.
link function in rethinking package, looks inside the
model and pulls the formula to make the posterior predictions.
p_female <- link(modelGD,
data=list(
department=rep(1:6, times=apps_per_dept),
applications=rep(1, total_apps),
gender=rep(1, total_apps)
))
p_male <- link(modelGD,
data=list(
department=rep(1:6, times=apps_per_dept),
applications=rep(1, total_apps),
gender=rep(2, total_apps)
))
dens(p_female - p_male, lwd=4, col=2)