library(rethinking)
library(tidyverse)
library(knitr)
library(kableExtra)
theme_set(theme_bw())

Is there gender discrimination in graduate admissions? In other words, does Gender influenwce Admission.

Department mediates admission because different departments have different slots/fundings and different popularities. So the admissions_rate is are affected by deparment qualities.

Total effect

data(UCBadmit)
dat <- UCBadmit %>% 
  mutate(gender = if_else(applicant.gender=="female", 1,2)) %>% 
  mutate(department = as.integer(dept)) %>% 
  select(admit, applications, gender, department)
dat %>% 
  kbl() %>% 
  kable_classic(full_width = F, html_font = "Cambria")
admit applications gender department
512 825 2 1
89 108 1 1
353 560 2 2
17 25 1 2
120 325 2 3
202 593 1 3
138 417 2 4
131 375 1 4
53 191 2 5
94 393 1 5
22 373 2 6
24 341 1 6

Total effect of Gender

modelG <- ulam(
  alist(
    admit ~ binomial(applications,p),
    logit(p) <- a[gender],
    a[gender] ~ normal(0,1)
  ),
  data = dat, chains=4, cores=12)
precis(modelG, depth=3)
##            mean         sd       5.5%      94.5%    n_eff    Rhat4
## a[1] -0.8301325 0.04994838 -0.9091422 -0.7483678 1290.577 1.002414
## a[2] -0.2169773 0.03770973 -0.2764316 -0.1577466 1372.687 1.003007
post1 <- extract.samples(modelG)
prob_admit_female = inv_logit(post1$a[,1])
prob_admit_male = inv_logit(post1$a[,2])

diff_prob <- prob_admit_female - prob_admit_male
dens(diff_prob, lwd=4, col=2, xlab="Gender contrast (probability)")

modelGD <- ulam(
  alist(
    admit ~ binomial(applications,p),
    logit(p) <- a[gender, department],
    matrix[gender,department]:a ~ normal(0,1)
  ),
  data = dat, chains=4, cores=12)

Note: if you’re confused about matrix[G,D]:a, its at 53:15 of the video. That gives us a matrix variable a with every combination.

precis(modelGD, depth=3)
##              mean         sd        5.5%      94.5%    n_eff     Rhat4
## a[1,1]  1.4742255 0.23592428  1.10311175  1.8519373 2100.616 0.9982883
## a[1,2]  0.6544687 0.39340643  0.03749995  1.2766999 2588.766 0.9996973
## a[1,3] -0.6602035 0.08728734 -0.80346235 -0.5206728 2118.483 0.9994618
## a[1,4] -0.6171283 0.10368609 -0.78392746 -0.4492815 2249.103 0.9991085
## a[1,5] -1.1444839 0.11544075 -1.33072980 -0.9584639 2273.551 1.0001324
## a[1,6] -2.4923235 0.20385293 -2.82377010 -2.1675226 3038.367 0.9992907
## a[2,1]  0.4894703 0.06971842  0.37821544  0.5997400 2404.143 0.9990085
## a[2,2]  0.5298825 0.08724038  0.39170370  0.6675184 2374.762 1.0026708
## a[2,3] -0.5334707 0.11474720 -0.71588035 -0.3472441 2266.208 1.0000879
## a[2,4] -0.6995687 0.10390152 -0.86433890 -0.5402697 2565.663 0.9986989
## a[2,5] -0.9365873 0.16213350 -1.20039935 -0.6820414 2440.112 0.9989415
## a[2,6] -2.6697049 0.20763792 -3.00528160 -2.3446183 2293.967 0.9985150
w <- xtabs(dat$applications ~ dat$department) / sum(dat$applications)
# i gave up here
# number of applications to simulate
total_apps <- sum(dat$applications)

# number of applications per dept.
df_apps_per_dept <- dat %>% group_by(department) %>%
  summarise(n= sum(applications)) %>% arrange(department)

apps_per_dept <- df_apps_per_dept$n

We will simulate if all applications are from women and then from men.

link function in rethinking package, looks inside the model and pulls the formula to make the posterior predictions.

p_female <- link(modelGD,
                 data=list(
                   department=rep(1:6, times=apps_per_dept),
                   applications=rep(1, total_apps),
                   gender=rep(1, total_apps)
                 ))

p_male <- link(modelGD,
                 data=list(
                   department=rep(1:6, times=apps_per_dept),
                   applications=rep(1, total_apps),
                   gender=rep(2, total_apps)
                 ))

dens(p_female - p_male, lwd=4, col=2)