Package loading.
library(tidyverse)
## Warning: package 'tibble' was built under R version 3.5.2
## Warning: package 'tidyr' was built under R version 3.5.2
## Warning: package 'purrr' was built under R version 3.5.2
## Warning: package 'dplyr' was built under R version 3.5.2
## Warning: package 'stringr' was built under R version 3.5.2
library(brms)
## Warning: package 'brms' was built under R version 3.5.2
## Warning: package 'Rcpp' was built under R version 3.5.2
library(wordbankr)
theme_set(langcog::theme_mikabr())
knitr::opts_chunk$set(cache = TRUE, warn = FALSE, message = FALSE)
Data loading
wg_admins <- get_administration_data(language = "English (American)",
form = "WS", original_ids = TRUE)
## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric
## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric
wg_long <- wg_admins %>%
group_by(original_id, mom_ed) %>%
count %>%
filter(n == 2, !is.na(mom_ed))
## Warning: Factor `mom_ed` contains implicit NA, consider using
## `forcats::fct_explicit_na`
## Warning: Factor `mom_ed` contains implicit NA, consider using
## `forcats::fct_explicit_na`
wg_long_admins <- left_join(wg_long, wg_admins)
wg_items <- get_item_data(language = "English (American)",
form = "WS")
## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric
## Warning in .local(conn, statement, ...): Decimal MySQL column 6 imported as
## numeric
wg_all <- get_instrument_data(language = "English (American)",
form = "WS",
items = filter(wg_items,
type == "word") %>%
pull(item_id))
wg_long_byitem <- filter(wg_all, data_id %in% wg_long_admins$data_id)
Make main dataframe.
d <- left_join(wg_long_byitem,
select(wg_long_admins,
data_id, original_id, age, mom_ed, sex)) %>%
mutate(produces = as.numeric(value == "produces")) %>%
select(-value) %>%
left_join(select(wg_items, num_item_id, definition, category)) %>%
rename(person = original_id,
item = definition) %>%
select(person, age, sex, mom_ed, category, item, produces)
ms <- d %>%
group_by(person, age, sex) %>%
summarise(produces = sum(produces))
ggplot(ms,
aes(x = age, y = produces, group = person, col = sex)) +
geom_jitter(alpha = .2, width = .2)+
geom_line(alpha = .2) +
geom_smooth(aes(group = sex), span = 2, se = FALSE) +
ggthemes::scale_color_solarized()
Break into two time periods and subsample.
earlier <- d %>%
group_by(person, age) %>%
count() %>%
group_by(person) %>%
mutate(earlier = ifelse(age == age[1], TRUE, FALSE),
twos = ifelse(age[1] == 24, TRUE, FALSE)) %>%
select(-n)
d <- left_join(d, earlier)
d_sub <- filter(d,
category == "food_drink",
twos)
d1_sub <- filter(d_sub, earlier)
d2_sub <- filter(d_sub, !earlier)
Plotting the subsampled data.
ms <- d_sub %>%
group_by(person, age, sex) %>%
summarise(produces = sum(produces))
ggplot(ms,
aes(x = age, y = produces, group = person, col = sex)) +
geom_jitter(alpha = .2, width = .2)+
geom_line(alpha = .2) +
geom_smooth(aes(group = sex), span = 2, se = FALSE) +
ggthemes::scale_color_solarized() +
langcog::theme_mikabr()
First fit 1pl to time 1.
formula_1pl <- produces ~ 1 + (1|person) + (1|item)
prior_1pl <-
prior("normal(0, 3)", class = "sd", group = "person") +
prior("normal(0, 3)", class = "sd", group = "item")
mod_d1_1pl <- brm(formula = formula_1pl,
prior = prior_1pl,
family = brmsfamily(family = "bernoulli", link = "logit"),
data = d1_sub)
##
## SAMPLING FOR MODEL '080fe31e0b6ab0c59347934ef33710e4' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 0.000273 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 2.73 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 4.37235 seconds (Warm-up)
## Chain 1: 4.67327 seconds (Sampling)
## Chain 1: 9.04562 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL '080fe31e0b6ab0c59347934ef33710e4' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 0.000148 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 1.48 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 4.4231 seconds (Warm-up)
## Chain 2: 4.72401 seconds (Sampling)
## Chain 2: 9.14711 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL '080fe31e0b6ab0c59347934ef33710e4' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 0.000151 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 1.51 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 4.43297 seconds (Warm-up)
## Chain 3: 4.62197 seconds (Sampling)
## Chain 3: 9.05494 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL '080fe31e0b6ab0c59347934ef33710e4' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 0.000146 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 1.46 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 4.30439 seconds (Warm-up)
## Chain 4: 4.67911 seconds (Sampling)
## Chain 4: 8.9835 seconds (Total)
## Chain 4:
Basic summary of the model.
summary(mod_d1_1pl)
## Family: bernoulli
## Links: mu = logit
## Formula: produces ~ 1 + (1 | person) + (1 | item)
## Data: d1_sub (Number of observations: 1700)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~item (Number of levels: 68)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.38 0.15 1.10 1.70 1.00 1507 2600
##
## ~person (Number of levels: 25)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.87 0.32 1.35 2.59 1.01 1026 1875
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -0.77 0.43 -1.58 0.09 1.00 608 933
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Examine parameters.
plot(mod_d1_1pl)
Plot items.
items_1pl <- as_tibble(coef(mod_d1_1pl)$item[, , "Intercept"]) %>%
mutate(item = rownames(coef(mod_d1_1pl)$item[, , "Intercept"]),
item = fct_reorder(item, Estimate))
ggplot(items_1pl,
aes(x = item, y = Estimate)) +
geom_pointrange(aes(ymin = Q2.5, ymax = Q97.5)) +
coord_flip()
Now plot people.
persons_1pl <- as_tibble(ranef(mod_d1_1pl)$person[, , "Intercept"]) %>%
mutate(person = rownames(ranef(mod_d1_1pl)$person[, , "Intercept"])) %>%
left_join(d1_sub %>%
group_by(person, age, sex) %>% count()) %>%
mutate(person = fct_reorder(person, Estimate))
ggplot(persons_1pl,
aes(x = person, y = Estimate, col = sex)) +
geom_pointrange(aes(ymin = Q2.5, ymax = Q97.5)) +
coord_flip()
Now think about modeling sex differences.
Classically, DIF would be written as a fied effect of sex plus a random effect of sex by item.
formula_1pl_dif <- produces ~ sex + (1 | person) + (sex | item)
prior_1pl_dif <-
prior("normal(0, 3)", class = "sd", group = "person") +
prior("normal(0, 3)", class = "sd", group = "item")
mod_d1_1pl_dif <- brm(formula = formula_1pl_dif,
prior = prior_1pl_dif,
family = brmsfamily(family = "bernoulli",
link = "logit"),
data = d1_sub)
##
## SAMPLING FOR MODEL '8bf3d3d3fa426de24ee1bbe596d3b3ba' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 0.000381 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 3.81 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 10.7652 seconds (Warm-up)
## Chain 1: 7.69702 seconds (Sampling)
## Chain 1: 18.4623 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL '8bf3d3d3fa426de24ee1bbe596d3b3ba' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 0.000239 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 2.39 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 10.9499 seconds (Warm-up)
## Chain 2: 7.89602 seconds (Sampling)
## Chain 2: 18.846 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL '8bf3d3d3fa426de24ee1bbe596d3b3ba' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 0.000245 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 2.45 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 11.284 seconds (Warm-up)
## Chain 3: 7.65485 seconds (Sampling)
## Chain 3: 18.9388 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL '8bf3d3d3fa426de24ee1bbe596d3b3ba' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 0.000251 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 2.51 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 11.3786 seconds (Warm-up)
## Chain 4: 7.76694 seconds (Sampling)
## Chain 4: 19.1455 seconds (Total)
## Chain 4:
plot(mod_d1_1pl_dif, pars = c("b_sexMale", "sd_item__sexMale"))
Plot items and DIF scores. Spoiler, there are no DIF effects on our subsample, food items.
items_1pl_dif <- bind_rows(
as_tibble(coef(mod_d1_1pl_dif)$item[, , "Intercept"]) %>%
mutate(coef = "Intercept"),
as_tibble(ranef(mod_d1_1pl_dif)$item[, , "sexMale"]) %>%
mutate(coef = "sexMale")) %>%
mutate(item = rep(rownames(coef(mod_d1_1pl_dif)$item[, , "Intercept"]),2),
item = fct_reorder(item, Estimate))
ggplot(items_1pl_dif,
aes(x = item, y = Estimate)) +
geom_pointrange(aes(ymin = Q2.5, ymax = Q97.5)) +
coord_flip() +
facet_grid(.~coef, scales = "free_x")
Now plot people.
persons_1pl_dif <- as_tibble(ranef(mod_d1_1pl_dif)$person[, , "Intercept"]) %>%
mutate(person = rownames(ranef(mod_d1_1pl_dif)$person[, , "Intercept"])) %>%
left_join(d1_sub %>%
group_by(person, age, sex) %>% count()) %>%
mutate(person = fct_reorder(person, Estimate))
persons_1pl_dif_coef <- as_tibble(coef(mod_d1_1pl_dif)$person[, , "Intercept"]) %>%
mutate(person = rownames(coef(mod_d1_1pl_dif)$person[, , "Intercept"])) %>%
left_join(d1_sub %>%
group_by(person, age, sex) %>% count()) %>%
mutate(person = fct_reorder(person, Estimate))
ggplot(persons_1pl_dif,
aes(x = person, y = Estimate, col = sex)) +
geom_pointrange(aes(ymin = Q2.5, ymax = Q97.5)) +
coord_flip()
Plot sex-DIF vs. non-sex-DIF estimates for people, showing the differences in the random ability estimates.
compare_dif <- tibble(estimate = persons_1pl$Estimate,
estimate_dif = persons_1pl_dif$Estimate,
estimate_dif_coef = persons_1pl_dif_coef$Estimate,
sex = persons_1pl$sex)
ggplot(data = compare_dif,
aes(x = estimate, y = estimate_dif,
col = sex)) +
geom_point() +
geom_abline(lty = 2)
# p2 <- ggplot(data = compare_dif,
# aes(x = estimate, y = estimate_dif_coef,
# col = sex)) +
# geom_point() +
# geom_abline(lty = 2) +
# theme(legend.pos = "bottom")
#
# cowplot::plot_grid(p1,p2)
You can get:
This second is of interest in reasoning about “ability in context.”
One way of formalizing the affirmative action hypothesis is the idea that, for some demographics, (2) should be a better predictor of future performance than (1).