Our goal here is to model trial-level variation in accuracy for familiar word recognition across all the experiments in Peekbank.
Let’s move forward with ALL THE DATA (tm). Window will be something like 500 – 4000 based on the above analysis (which is incomplete, but which suggested that using all the data was better than not).
df <- d_trial |>
filter(t_norm > 500, t_norm < 4000) |>
group_by(dataset_name, dataset_id, administration_id,
age, stimulus_id, target_label, distractor_label) |>
summarise(accuracy = mean(correct[t_norm > 0], na.rm=TRUE),
prop_data = mean(!is.na(correct[t_norm > 0])),
target = sum(correct[t_norm > 0], na.rm=TRUE),
distractor = sum(!correct[t_norm > 0], na.rm=TRUE),
elogit = log( (target + .5) / (distractor + .5) ))
df <- df[complete.cases(df),]
How much data is there?
df %>%
group_by(dataset_name) %>%
count() %>%
arrange(desc(n)) %>%
knitr::kable()
dataset_name | n |
---|---|
adams_marchman_2018 | 5973 |
fmw_2013 | 1790 |
reflook_socword | 1505 |
attword_processed | 1059 |
xsectional_2007 | 1007 |
newman_genderdistractor | 711 |
mahr_coartic | 621 |
potter_canine | 515 |
perry_cowpig | 504 |
pomper_saffran_2016 | 471 |
frank_tablet_2016 | 461 |
swingley_aslin_2002 | 420 |
garrison_bergelson_2020 | 395 |
bacon_gendercues | 380 |
ronfard_2021 | 296 |
reflook_v4 | 277 |
pomper_yumme | 255 |
pomper_salientme | 248 |
newman_sinewave_2015 | 150 |
potter_remix | 106 |
df %>%
group_by(target_label) %>%
count() %>%
arrange(desc(n)) %>%
DT::datatable()
How are we going to model accuracies? The trouble is that we have a very odd dependent variable.
ggplot(df, aes(x = accuracy)) +
geom_histogram()
ggplot(df, aes(x = log(accuracy))) +
geom_histogram()
What about going back to the raw proportions?
ggplot(df, aes(x = elogit)) +
geom_histogram()
And consider filtering 0/1 observations just to see if we can get a decent distribution.
ggplot(filter(df, accuracy > 0, accuracy < 1),
aes(x = elogit)) +
geom_histogram()
That looks lovely.
df_clean <- filter(df, accuracy > 0, accuracy < 1)
Plot all trials by all participants.
ggplot(df_clean, aes(x = age/12, y = elogit)) +
geom_point(alpha = .01) +
geom_smooth() +
geom_hline(lty = 2, yintercept = 0) +
ggthemes::theme_few()
Try breaking this down by word.
words <- df_clean |>
group_by(target_label) |>
count()
hf_words = words |>
filter(n > 200)
comparison_plot <- ggplot(filter(df_clean, target_label %in% hf_words$target_label),
aes(x = age/12, y = elogit, col = target_label)) +
geom_jitter(alpha = .05, width = .02, height = 0) +
geom_smooth(se=FALSE) +
geom_hline(lty = 2, yintercept = 0)+
ggthemes::theme_few()
comparison_plot
What follows is an iterative model building/comparison exercise. The
goal is really to build up something that is interpretable as a model of
variation in the LWL task. You might think this could be done
programmatically as opposed to with cut-and-paste as I did it here. In
my defense, I spent a lot of time with tidymodels
and
workflows
and found that there were some bugs in the
predict
workflow that were insurmountable for
lme4
. In particular, we couldn’t predict with different
random effect structures, meaning that one critical function that we
were trying to fulfill here was not doable under
tidymodels
. Fail. Hence the copy/paste.
lmm_fit <- lmer(elogit ~ age +
(1 | administration_id),
data = df_clean)
preds <- expand_grid(
age = seq(min(df_clean$age),max(df_clean$age),1),
target_label = hf_words$target_label)
preds <- preds |>
mutate(.pred = predict(lmm_fit,
type = "response",
re.form = ~ 0,
newdata = preds))
ggplot(df_clean,
aes(x = age, y = elogit)) +
geom_point(alpha = .1) +
geom_smooth() +
geom_line(data = preds,
aes(x = age, y = .pred, col = target_label)) +
geom_hline(lty = 2, yintercept = 0) +
xlab("Age (months)") +
ggthemes::theme_few()
Now let’s put in some random effects.
lmm_fit <- lmer(elogit ~ age +
(1 | administration_id) +
(1 | dataset_name) +
(1 | target_label),
data = df_clean)
preds <- expand_grid(
age = seq(min(df_clean$age),max(df_clean$age),1),
target_label = hf_words$target_label)
preds <- preds |>
mutate(.pred = predict(lmm_fit,
type = "response",
re.form = ~ (1|target_label),
newdata = preds))
ggplot(df_clean,
aes(x = age, y = elogit)) +
geom_point(alpha = .1) +
geom_line(data = preds,
aes(x = age, y = .pred, col = target_label)) +
geom_hline(lty = 2, yintercept = 0) +
xlab("Age (months)") +
ggthemes::theme_few() +
geom_smooth()
Random slopes by age.
lmm_fit <- lmer(elogit ~ age +
(1 | administration_id) +
(1 | dataset_name) +
(age | target_label),
data = df_clean)
preds <- expand_grid(
age = seq(min(df_clean$age),max(df_clean$age),1),
target_label = hf_words$target_label)
preds <- preds |>
mutate(.pred = predict(lmm_fit,
type = "response",
re.form = ~ (age|target_label),
newdata = preds))
main_plot <- ggplot(df_clean,
aes(x = age, y = elogit)) +
geom_point(alpha = .1) +
geom_line(data = preds,
aes(x = age, y = .pred, col = target_label)) +
geom_hline(lty = 2, yintercept = 0) +
xlab("Age (months)") +
ggthemes::theme_few()
plot_grid(main_plot + scale_color_discrete(guide = "none"),
comparison_plot + scale_color_discrete(guide = "none"))
Seems like we should try to accommodate the curvature in the age
slopes. One possibility is to consider log(age)
as a
predictor. We will center age to increase convergence and swap to
bobyqa
optimizer.
df_clean$log_age <- log(df_clean$age)
df_clean$log_age_centered <- df_clean$log_age - mean(df_clean$log_age)
lmm_fit <- lmer(elogit ~ log_age_centered +
(1 | administration_id) +
(1 | dataset_name) +
(log_age_centered | target_label),
data = df_clean,
control = lmerControl(optimizer="bobyqa"))
preds <- expand_grid(
log_age_centered = seq(min(df_clean$log_age_centered),
max(df_clean$log_age_centered), .1),
target_label = hf_words$target_label)
preds <- preds |>
mutate(.pred = predict(lmm_fit,
type = "response",
re.form = ~ (log_age_centered | target_label),
newdata = preds),
age = exp(log_age_centered + mean(df_clean$log_age)))
main_plot <- ggplot(df_clean,
aes(x = age, y = elogit)) +
geom_point(alpha = .1) +
geom_line(data = preds,
aes(x = age, y = .pred, col = target_label)) +
geom_hline(lty = 2, yintercept = 0) +
xlab("Age (months)") +
ggthemes::theme_few()
plot_grid(main_plot + scale_color_discrete(guide = "none"),
comparison_plot + scale_color_discrete(guide = "none"))
This ends up looking quite good in terms of the shape of the curves and their distribution. Let’s look at the random effects.
tibble(ranef(lmm_fit)$target_label) |>
mutate(target_label = rownames(ranef(lmm_fit)$target_label)) |>
left_join(words) |>
rename(intercept = `(Intercept)`) |>
select(target_label, intercept, log_age_centered, n) |>
arrange(desc(n)) |>
filter(n>100) |>
knitr::kable(digits = 2)
target_label | intercept | log_age_centered | n |
---|---|---|---|
dog | -0.03 | -0.04 | 1798 |
book | 0.20 | 0.11 | 1470 |
ball | -0.09 | -0.46 | 1245 |
car | 0.23 | 0.36 | 1129 |
cat | -0.05 | -0.11 | 1075 |
shoe | -0.03 | 0.07 | 1063 |
bird | -0.15 | 0.16 | 990 |
baby | 0.25 | 0.12 | 921 |
banana | -0.16 | -0.06 | 400 |
apple | -0.23 | 0.16 | 384 |
frog | 0.34 | -0.06 | 369 |
cookie | 0.28 | 0.05 | 230 |
sock | -0.25 | -0.22 | 177 |
block | 0.18 | -0.06 | 149 |
cup | -0.13 | -0.17 | 149 |
horse | 0.06 | -0.12 | 135 |
duck | 0.11 | -0.03 | 132 |
keys | 0.13 | 0.19 | 113 |
I’m worried because these don’t look at all like they reflect
acquisition ordering. For example, ball
has a very low
slope and intercept. Let’s take a look at the top ten of these in the
empirical data.
vhf_words = words |>
filter(n > 800)
ggplot(filter(df_clean, target_label %in% vhf_words$target_label),
aes(x = age, y = elogit)) +
geom_jitter(width = 1, height = 0, alpha = .01) +
facet_wrap(~target_label) +
geom_smooth(se=TRUE) +
scale_color_solarized() +
geom_hline(lty = 2, yintercept = 0)+
ggthemes::theme_few()
Let’s zoom in on ball
.
df_clean |>
ungroup() |>
filter(target_label == "ball") |>
group_by(target_label, distractor_label, dataset_name) |>
summarise(age = mean(age)/12) |>
arrange(age) |>
knitr::kable(digits = 1)
target_label | distractor_label | dataset_name | age |
---|---|---|---|
ball | spoon | garrison_bergelson_2020 | 1.1 |
ball | block | garrison_bergelson_2020 | 1.2 |
ball | flower | garrison_bergelson_2020 | 1.2 |
ball | vacuum | garrison_bergelson_2020 | 1.2 |
ball | car | garrison_bergelson_2020 | 1.2 |
ball | phone | garrison_bergelson_2020 | 1.2 |
ball | sock | garrison_bergelson_2020 | 1.2 |
ball | apple | swingley_aslin_2002 | 1.3 |
ball | hat | garrison_bergelson_2020 | 1.3 |
ball | highchair | garrison_bergelson_2020 | 1.3 |
ball | shoe | garrison_bergelson_2020 | 1.4 |
ball | cup | garrison_bergelson_2020 | 1.4 |
ball | shoe | adams_marchman_2018 | 1.4 |
ball | book | ronfard_2021 | 1.7 |
ball | shoe | fmw_2013 | 1.7 |
ball | duck | mahr_coartic | 1.7 |
ball | truck | newman_genderdistractor | 1.9 |
ball | shoe | xsectional_2007 | 2.0 |
ball | cookie | xsectional_2007 | 2.0 |
ball | cookie | potter_canine | 2.0 |
ball | car | newman_sinewave_2015 | 2.2 |
We see garrison_bergelson_2020
has lots and lots of
different targets. Across datasets, trends definitely go up, but there
is confounding across datasets.
ggplot(filter(df_clean, target_label == "ball"),
aes(x = age, y = elogit, col = dataset_name)) +
geom_jitter(width = 1, height = 0, alpha = .1) +
geom_smooth(method = "lm", se=FALSE) +
geom_hline(lty = 2, yintercept = 0)+
ggthemes::theme_few()
First, we should probably model within-dataset age effects. Second, we should probably model the distractors.
Our model of distractors right now is purely linear. Some distractors
are harder – such that if having car
as the distractor
makes you less likely to look at the target, then it’s going to have a
negative coefficient. Similarly, maybe ball
is a boring
distractor and so you’re more likely to look. We can’t fit in age-slopes
though.
lmm_fit <- lmer(elogit ~ log_age_centered +
(1 | administration_id) +
(log_age_centered | dataset_name) +
(log_age_centered | target_label) +
(1 | distractor_label),
data = df_clean,
control = lmerControl(optimizer="bobyqa"))
preds <- expand_grid(
log_age_centered = seq(min(df_clean$log_age_centered),
max(df_clean$log_age_centered), .1),
target_label = hf_words$target_label)
preds <- preds |>
mutate(.pred = predict(lmm_fit,
type = "response",
re.form = ~ (log_age_centered | target_label),
newdata = preds),
age = exp(log_age_centered + mean(df_clean$log_age)))
main_plot <- ggplot(df_clean,
aes(x = age, y = elogit)) +
geom_point(alpha = .1) +
geom_line(data = preds,
aes(x = age, y = .pred, col = target_label)) +
geom_hline(lty = 2, yintercept = 0) +
xlab("Age (months)") +
ggthemes::theme_few()
plot_grid(main_plot + scale_color_discrete(guide = "none"),
comparison_plot + scale_color_discrete(guide = "none"))
Let’s look at the random effects one more time and see if this helps.
target_ranef <- tibble(ranef(lmm_fit)$target_label) |>
mutate(target_label = rownames(ranef(lmm_fit)$target_label)) |>
left_join(words) |>
rename(intercept = `(Intercept)`) |>
select(target_label, intercept, log_age_centered, n)
target_ranef |>
arrange(desc(n)) |>
filter(n>100) |>
knitr::kable(digits = 2)
target_label | intercept | log_age_centered | n |
---|---|---|---|
dog | -0.05 | -0.28 | 1798 |
book | 0.28 | 0.12 | 1470 |
ball | -0.19 | -0.35 | 1245 |
car | 0.19 | 0.39 | 1129 |
cat | 0.03 | -0.08 | 1075 |
shoe | -0.03 | 0.02 | 1063 |
bird | -0.06 | 0.12 | 990 |
baby | 0.48 | 0.08 | 921 |
banana | -0.21 | -0.07 | 400 |
apple | -0.21 | 0.01 | 384 |
frog | 0.29 | -0.21 | 369 |
cookie | 0.22 | 0.14 | 230 |
sock | -0.26 | -0.18 | 177 |
block | 0.16 | -0.03 | 149 |
cup | -0.13 | -0.15 | 149 |
horse | 0.06 | -0.03 | 135 |
duck | 0.07 | -0.03 | 132 |
keys | 0.13 | 0.21 | 113 |
It looks like the coefficients for ball
are regularized
more toward zero though probably not completely so. That makes me think
that we are probably indexing a bunch of salience effects as well as
true word recognition.
Just for kicks, let’s look at the distractors as well.
distractors <- df_clean |>
group_by(distractor_label) |>
count()
distractor_ranef <- tibble(ranef(lmm_fit)$distractor_label) |>
mutate(distractor_label = rownames(ranef(lmm_fit)$distractor_label)) |>
left_join(distractors) |>
rename(intercept = `(Intercept)`) |>
select(distractor_label, intercept, n)
distractor_ranef |>
arrange(desc(n)) |>
filter(n>100) |>
knitr::kable(digits = 2)
distractor_label | intercept | n |
---|---|---|
dog | -0.21 | 1714 |
book | 0.12 | 1495 |
ball | 0.04 | 1275 |
car | 0.03 | 1225 |
cat | -0.05 | 1079 |
shoe | 0.22 | 1046 |
baby | -0.03 | 1001 |
bird | -0.02 | 995 |
apple | 0.08 | 390 |
frog | -0.04 | 386 |
banana | 0.04 | 348 |
cookie | -0.13 | 215 |
sock | -0.03 | 152 |
block | -0.05 | 151 |
duck | -0.02 | 144 |
cup | 0.02 | 142 |
keys | -0.01 | 112 |
This looks like animates are negative (harder) and inanimates are positive (easier) for the most part.
Let’s add a fixed effect for animacy of target and distractor to try and soak up these preference effects systematically.
sort(unique(c(unique(df_clean$target_label),
unique(df_clean$distractor_label))))
## [1] "animal" "apple" "baby" "ball" "balloon"
## [6] "banana" "bathtub" "bear" "bed" "belt"
## [11] "bib" "bike" "bird" "blanket" "block"
## [16] "blueberries" "boat" "book" "boot" "bottle"
## [21] "bowl" "box" "boy" "broom" "brush"
## [26] "bucket" "bunny" "bus" "cake" "can"
## [31] "car" "carrot" "cat" "chair" "chalk"
## [36] "chicken" "clock" "coat" "comb" "cookie"
## [41] "cow" "crib" "cup" "diaper" "dog"
## [46] "donut" "door" "dress" "drink" "drum"
## [51] "duck" "elephant" "fan" "fish" "flag"
## [56] "flower" "food" "foot" "frog" "giraffe"
## [61] "glasses" "glove" "grapes" "guitar" "hammer"
## [66] "hat" "highchair" "horse" "hose" "juice"
## [71] "kangaroo" "keys" "knife" "ladder" "lamp"
## [76] "lion" "milk" "monkey" "mouth" "muffin"
## [81] "necklace" "opal" "owl" "pacifier" "pajamas"
## [86] "peas" "phone" "piano" "pig" "plate"
## [91] "pudding" "puppy" "purse" "puzzle" "remote"
## [96] "sandbox" "scissors" "shawl" "sheep" "shirt"
## [101] "shoe" "shorts" "shovel" "sippy" "slide"
## [106] "slipper" "sock" "spoon" "strawberry" "stroller"
## [111] "sunglasses" "table" "tablet" "tape" "teddy"
## [116] "tiger" "toothbrush" "toy" "train" "truck"
## [121] "vacuum" "wagon" "water" "waterbottle" "whale"
## [126] "zebra" "zipper"
animates <- c("anima","baby","dog","bear","bird","boy",
"bunny","cat","chicken","cow","dog","duck","elephant",
"fish","frog","giraffe","horse","kangaroo","lion","monkey",
"owl","pig","puppy","sheep","teddy","tiger","whale","zebra")
df_clean$animate_target <- df_clean$target_label %in% animates
df_clean$animate_distractor <- df_clean$distractor_label %in% animates
Now let’s add animacy of targets and distractor to the model as well as interactions with age.
lmm_fit <- lmer(elogit ~ log_age_centered * animate_target +
log_age_centered * animate_distractor +
(1 | administration_id) +
(log_age_centered | dataset_name) +
(log_age_centered | target_label) +
(1 | distractor_label),
data = df_clean,
control = lmerControl(optimizer="bobyqa"))
preds <- expand_grid(
animate_target = c(TRUE, FALSE),
animate_distractor = c(TRUE, FALSE),
log_age_centered = seq(min(df_clean$log_age_centered),
max(df_clean$log_age_centered), .1),
target_label = hf_words$target_label)
preds <- preds |>
mutate(.pred = predict(lmm_fit,
type = "response",
re.form = ~ (log_age_centered | target_label),
newdata = preds),
age = exp(log_age_centered + mean(df_clean$log_age)))
main_plot <- ggplot(df_clean,
aes(x = age, y = elogit)) +
geom_point(alpha = .1) +
geom_line(data = filter(preds, !animate_target, !animate_distractor),
aes(x = age, y = .pred, col = target_label)) +
geom_hline(lty = 2, yintercept = 0) +
xlab("Age (months)") +
ggthemes::theme_few()
plot_grid(main_plot + scale_color_discrete(guide = "none"),
comparison_plot + scale_color_discrete(guide = "none"))
Now when we make predictions, we need to make the predictions with or
without these animacy effects. It’s kind of funny to look at a “dog”
prediction with animate_target=FALSE
but that’s really like
saying “what’s the predicted curve for dog knowledge independent of
perceptual biases.” Let’s look at the model fits first, and then the
random effects.
knitr::kable(summary(lmm_fit)[[10]], digits = 2)
Estimate | Std. Error | t value | |
---|---|---|---|
(Intercept) | 0.66 | 0.07 | 9.65 |
log_age_centered | 1.05 | 0.16 | 6.73 |
animate_targetTRUE | 0.20 | 0.09 | 2.27 |
animate_distractorTRUE | -0.21 | 0.06 | -3.58 |
log_age_centered:animate_targetTRUE | -0.25 | 0.12 | -2.06 |
log_age_centered:animate_distractorTRUE | 0.24 | 0.10 | 2.48 |
We can see that there is a pretty sizable effect for each, such that:
So in sum, there are big animate biases, especially for younger kids. That seems really important.
Tried fitting an interaction (animate target X animate distractor) to see if they canceled out but the effect was negligible.
Let’s see how the word-level random effects look now. Really they are quite different!
target_ranef <- tibble(ranef(lmm_fit)$target_label) |>
mutate(target_label = rownames(ranef(lmm_fit)$target_label)) |>
left_join(words) |>
rename(intercept = `(Intercept)`) |>
select(target_label, intercept, log_age_centered, n)
target_ranef |>
arrange(desc(intercept)) |>
filter(n>100) |>
knitr::kable(digits = 2)
target_label | intercept | log_age_centered | n |
---|---|---|---|
baby | 0.40 | 0.16 | 921 |
book | 0.33 | 0.05 | 1470 |
car | 0.26 | 0.28 | 1129 |
cookie | 0.25 | 0.12 | 230 |
frog | 0.17 | -0.01 | 369 |
block | 0.15 | 0.03 | 149 |
keys | 0.13 | 0.15 | 113 |
shoe | 0.00 | 0.03 | 1063 |
horse | -0.01 | 0.00 | 135 |
duck | -0.02 | -0.03 | 132 |
cat | -0.04 | -0.03 | 1075 |
dog | -0.10 | -0.10 | 1798 |
bird | -0.11 | 0.07 | 990 |
cup | -0.14 | -0.12 | 149 |
apple | -0.14 | -0.07 | 384 |
ball | -0.14 | -0.25 | 1245 |
banana | -0.16 | -0.07 | 400 |
sock | -0.26 | -0.17 | 177 |
Let’s save the resulting model and carry it forward.
save(file = here("data","df_clean.Rds"), df_clean)
save(file = here("data","lmm_fit.Rds"), lmm_fit)