library(knitr)
library(peekbankr)
library(tidyverse)
library(lme4)
library(lmerTest)
library(tictoc)
library(langcog)
library(here)
#library(tidymodels)
library(wordbankr)
#devtools::install_github("tidymodels/multilevelmod")
#library(multilevelmod)
figure_path <- here("figures")
load(file = "data/aoi_data_joined.Rds")
knitr::opts_chunk$set(cache = TRUE, warning = FALSE, message = FALSE)
dataset_name_mapping <- read_csv(here("data","dataset_name_mapping.csv"))
con <- connect_to_peekbank()
stimuli <- get_stimuli(connection = con) %>% collect()
subjects <- get_subjects(connection = con) %>% collect()
aoi_data_joined <- aoi_data_joined %>%
left_join(subjects)
The goal is to explore relationships between item-level properties (specifically, child-directed word frequency and age of acquisition) and accurate looking in the looking while listening paradigm across age.
Compute trial-level accuracy (proportion looking to the target) in a fixed critical window (300-2000ms).
We are focusing only on
familiar words
ages between 12-60 months
English datasets (since this is what we have norms for)
t_min <- 300
t_max <- 2000
by_trial_means <- aoi_data_joined %>%
#remove dataset
#filter(dataset_name!="pomper_saffran_2016") %>%
#restrict to english datasets
filter(native_language == "eng") %>%
#restrict age range
filter(age > 12, age <= 60) %>%
# familiar target items only %>%
filter(stimulus_novelty == "familiar") %>%
#window of analysis
filter(t_norm > t_min, t_norm < t_max) %>%
mutate(age_binned = cut(age, seq(12,60,12))) %>%
rename(target_label = english_stimulus_label) %>%
group_by(administration_id, trial_id, target_label, distractor_id,
age, age_binned) %>%
summarise(prop_looking = sum(aoi == "target", na.rm = TRUE) /
(sum(aoi == "target", na.rm=TRUE) +
sum(aoi=="distractor", na.rm=TRUE)),
prop_missing = mean(aoi == "missing", na.rm = TRUE)) %>%
left_join(stimuli, by = c("distractor_id" = "stimulus_id")) %>%
rename(distractor_label = english_stimulus_label)
Filter the data to trials where there is sufficient looking data (target or distractor looking on at least 2/3 of the trial)
acc_mod_data <- by_trial_means %>%
ungroup() %>%
filter(prop_missing < 1/3) %>%
mutate(age_centered = age - mean(age,na.rm=TRUE))
target_label_counts <- acc_mod_data %>%
ungroup() %>%
group_by(target_label) %>%
summarize(
trial_count=n()
)
acc_mod_data <- acc_mod_data %>%
left_join(target_label_counts)
TO DO: re-extract frequency information using childes-db. Source for the current frequency values: https://github.com/mikabr/aoa-prediction
freq <- read_csv(here("data/childes_english.csv"))
acc_mod_freq <- left_join(acc_mod_data,
select(freq,word, word_count),by = c("target_label" = "word")) %>%
rename(target_word_count=word_count) %>%
left_join(select(freq,word, word_count),by = c("distractor_label" = "word")) %>%
rename(distractor_word_count=word_count) %>%
ungroup() %>%
mutate(target_log_freq = log(target_word_count),
distractor_log_freq = log(distractor_word_count)) %>%
mutate(target_log_freq_centered = target_log_freq-mean(target_log_freq,na.rm=TRUE),
distractor_log_freq_centered = distractor_log_freq-mean(distractor_log_freq,na.rm=TRUE)) %>%
mutate(luce_log_freq = target_log_freq/ (target_log_freq+distractor_log_freq))
by_target_item_means <- acc_mod_freq %>%
ungroup() %>%
group_by(target_label,target_word_count,target_log_freq,age_binned) %>%
summarise(
N=n(),
mean_prop_looking=mean(prop_looking,na.rm=TRUE)
)
Run this code chunk to collect AOA information from the English (American) WS form on Wordbank. After identifying target labels that cannot be found on Wordbank, a .csv is exported to resolve inconsistencies by hand (e.g. “chicken” == “chicken (animal)” in Wordbank).
items_for_aoa <- unique(acc_mod_data$target_label)
#get wordbank items
wordbank_items_eng_ws <- get_item_data(language = "English (American)", form = "WS")
#compare to target label names
setdiff(items_for_aoa,wordbank_items_eng_ws$definition)
#output set difference for manual processing
write_csv(data.frame(target_label=setdiff(items_for_aoa,wordbank_items_eng_ws$definition)),here("data","items_dropped_aoa.csv"))
#set up and read in mapping file w/ aligned definitions for worddbank
stimulus_label_wordbank_intersect <- data.frame(
target_label=intersect(items_for_aoa,wordbank_items_eng_ws$definition),
definition=intersect(items_for_aoa,wordbank_items_eng_ws$definition))
stimulus_label_wordbank_mapping <- read_csv(here("data","stimulus_label_wordbank_mapping.csv"))
stimulus_label_wordbank <- bind_rows(stimulus_label_wordbank_intersect,stimulus_label_wordbank_mapping)
item_names_for_wordbank <- stimulus_label_wordbank$definition[!is.na(stimulus_label_wordbank$definition)]
items_for_wordbank <- filter(wordbank_items_eng_ws,definition %in% c(item_names_for_wordbank))$item_id
#get instrument data for target label items from wordbank
eng_ws_data <- get_instrument_data(language = "English (American)",
form = "WS",
items = items_for_wordbank,
administrations=TRUE,
iteminfo=TRUE)
#fit AOA curves to obtain AOA estimates (logistic regression)
aoas_ws_produces <- fit_aoa(eng_ws_data,measure="produces", age_min=0) %>%
ungroup() %>%
select(aoa,item_id,definition) %>%
left_join(stimulus_label_wordbank) %>%
select(target_label,definition,aoa)
write_csv(aoas_ws_produces,here("data","aoas_wordbank_ws_produces.csv"))
Join English AOAs into the trial-level accuracy data, both for target labels and for distractor labels.
#aoas <- read_csv(here("data","bglm_aoas_english.csv"))
aoas <- read_csv(here("data","aoas_wordbank_ws_produces.csv"))
acc_mod_freq_aoa <- left_join(acc_mod_freq,
aoas) %>% #%>%
# transmute(target_label = definition,
# target_aoa = bglm_aoa,
# target_category = category)) %>%
rename(target_aoa=aoa) %>%
left_join(aoas %>%
transmute(distractor_label = definition,
distractor_aoa = aoa)) %>%
filter(!is.na(target_aoa), !is.na(distractor_aoa)) %>%
ungroup() %>%
mutate(target_aoa_centered = target_aoa - mean(target_aoa,na.rm=TRUE),
distractor_aoa_centered = distractor_aoa - mean(distractor_aoa,na.rm=TRUE)) %>%
mutate(inverse_target = 1/target_aoa,
inverse_distractor = 1/distractor_aoa,
luce_untransformed=target_aoa/(target_aoa+distractor_aoa),
luce = inverse_target / (inverse_target + inverse_distractor),
luce_log = log(inverse_target) / (log(inverse_target) + log(inverse_distractor)))
First, we’re going to explore whether the log word frequency of the relevant items (from CHILDES) predicts proportion target looking. We’ll look at just the target word frequency first, and then more complex models that also include distractor label frequency. The crux is that although it appears that there is some signal here (see plots below and model without random item effects), that signal doesn’t generalize across items (i.e., once random effects for items are added into the model). This is probably the result of a few different things coming together:
We have vastly different numbers of observations for different items
There are idiosyncrasies to these datasets, and one or two datasets with a distinct set of items can have a vast amount of influence at this point.
The items are not randomly distributed across e.g., age, but are specifically selected for the age range tested, so there is significant range restriction likely going on.
#by age bin
acc_mod_freq %>%
ggplot(aes(target_log_freq,prop_looking))+
geom_point(alpha=0.01)+
geom_smooth(method="lm")
#by age bin
acc_mod_freq %>%
ggplot(aes(target_log_freq,prop_looking))+
geom_point(alpha=0.01)+
geom_smooth(method="lm")+
facet_wrap(~age_binned)
#### By Dataset
#by dataset
acc_mod_freq %>%
ggplot(aes(target_log_freq,prop_looking))+
geom_point(alpha=0.01)+
geom_smooth(method="lm")+
facet_wrap(~dataset_name)
This plot shows relationships between frequency and proportion target looking when averaging across trials for each specific item.
n_cutoff <- 20
by_target_item_means %>%
filter(N>n_cutoff) %>%
ggplot(aes(target_log_freq,mean_prop_looking,size=N,color=target_label))+
geom_point()+
geom_smooth(method="lm",color="black")+
theme(legend.position="none")+
facet_wrap(~age_binned)
Predict proportion looking from target frequency, controlling for age. The frequency effect does not appear generalize across items (i.e., it disappears once by-item random effects are included).
m1 <- lmer(prop_looking ~ target_log_freq_centered+age_centered+(1|administration_id)+(1|dataset_name), data = acc_mod_freq)
summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: prop_looking ~ target_log_freq_centered + age_centered + (1 |
## administration_id) + (1 | dataset_name)
## Data: acc_mod_freq
##
## REML criterion at convergence: 7615.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.8705 -0.5962 0.1428 0.7675 2.3373
##
## Random effects:
## Groups Name Variance Std.Dev.
## administration_id (Intercept) 0.005157 0.07181
## dataset_name (Intercept) 0.003348 0.05787
## Residual 0.082544 0.28730
## Number of obs: 19809, groups: administration_id, 1296; dataset_name, 10
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 6.688e-01 1.875e-02 8.976e+00 35.66 5.57e-11 ***
## target_log_freq_centered 1.092e-02 1.733e-03 1.912e+04 6.30 3.04e-10 ***
## age_centered 4.968e-03 3.868e-04 2.049e+03 12.85 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trg___
## trgt_lg_fr_ 0.005
## age_centerd -0.030 -0.004
m1A <- lmer(prop_looking ~ target_log_freq_centered+age_centered+(1|administration_id)+(1|dataset_name)+(1|target_label), data = acc_mod_freq)
summary(m1A)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: prop_looking ~ target_log_freq_centered + age_centered + (1 |
## administration_id) + (1 | dataset_name) + (1 | target_label)
## Data: acc_mod_freq
##
## REML criterion at convergence: 7229.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9770 -0.5863 0.1402 0.7535 2.3175
##
## Random effects:
## Groups Name Variance Std.Dev.
## administration_id (Intercept) 0.005275 0.07263
## target_label (Intercept) 0.005242 0.07240
## dataset_name (Intercept) 0.005284 0.07269
## Residual 0.080037 0.28291
## Number of obs: 19809, groups:
## administration_id, 1296; target_label, 131; dataset_name, 10
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 6.597e-01 2.498e-02 1.117e+01 26.412 2.03e-11 ***
## target_log_freq_centered -7.480e-04 5.988e-03 1.179e+02 -0.125 0.901
## age_centered 5.014e-03 3.871e-04 2.604e+03 12.952 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trg___
## trgt_lg_fr_ 0.128
## age_centerd -0.023 0.000
I also considered restricting the data to just items where there is some minimum number of observations (in this case, 20), since I thought items with only small numbers of observations might be introducing a lot of noise. This does not appear to make a big difference to the models, however.
m1A_red <- lmer(prop_looking ~ target_log_freq_centered+age_centered+(1|administration_id)+(1|dataset_name)+(1|target_label), data = filter(acc_mod_freq,trial_count>20))
summary(m1A_red)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: prop_looking ~ target_log_freq_centered + age_centered + (1 |
## administration_id) + (1 | dataset_name) + (1 | target_label)
## Data: filter(acc_mod_freq, trial_count > 20)
##
## REML criterion at convergence: 7097.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9818 -0.5864 0.1417 0.7533 2.3219
##
## Random effects:
## Groups Name Variance Std.Dev.
## administration_id (Intercept) 0.005292 0.07275
## target_label (Intercept) 0.004974 0.07052
## dataset_name (Intercept) 0.005163 0.07186
## Residual 0.079865 0.28260
## Number of obs: 19583, groups:
## administration_id, 1296; target_label, 91; dataset_name, 10
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 6.626e-01 2.485e-02 1.142e+01 26.66 1.21e-11 ***
## target_log_freq_centered 9.179e-04 6.573e-03 8.866e+01 0.14 0.889
## age_centered 5.010e-03 3.870e-04 2.566e+03 12.95 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trg___
## trgt_lg_fr_ 0.112
## age_centerd -0.024 0.002
Fit more complex models including both target word and distractor word frequency. The issues remain similar.
m2 <- lmer(prop_looking ~ target_log_freq_centered+distractor_log_freq_centered+age_centered+(1|administration_id)+(1|dataset_name), data = acc_mod_freq)
summary(m2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## prop_looking ~ target_log_freq_centered + distractor_log_freq_centered +
## age_centered + (1 | administration_id) + (1 | dataset_name)
## Data: acc_mod_freq
##
## REML criterion at convergence: 7238.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.8750 -0.5937 0.1394 0.7779 2.1728
##
## Random effects:
## Groups Name Variance Std.Dev.
## administration_id (Intercept) 0.005239 0.07238
## dataset_name (Intercept) 0.001948 0.04414
## Residual 0.082548 0.28731
## Number of obs: 18791, groups: administration_id, 1294; dataset_name, 10
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 6.705e-01 1.467e-02 8.351e+00 45.713 2.55e-11
## target_log_freq_centered 1.039e-02 1.754e-03 1.797e+04 5.923 3.21e-09
## distractor_log_freq_centered 1.359e-03 1.644e-03 1.803e+04 0.826 0.409
## age_centered 5.354e-03 3.956e-04 1.130e+03 13.533 < 2e-16
##
## (Intercept) ***
## target_log_freq_centered ***
## distractor_log_freq_centered
## age_centered ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trg___ dst___
## trgt_lg_fr_ 0.008
## dstrctr_l__ 0.008 -0.044
## age_centerd -0.037 -0.001 -0.001
m2A <- lmer(prop_looking ~ target_log_freq_centered+distractor_log_freq_centered+age_centered+(1|administration_id)+(1|dataset_name)+(1|target_label)+(1|distractor_label), data = acc_mod_freq)
summary(m2A)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## prop_looking ~ target_log_freq_centered + distractor_log_freq_centered +
## age_centered + (1 | administration_id) + (1 | dataset_name) +
## (1 | target_label) + (1 | distractor_label)
## Data: acc_mod_freq
##
## REML criterion at convergence: 6815.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9455 -0.5832 0.1331 0.7561 2.3376
##
## Random effects:
## Groups Name Variance Std.Dev.
## administration_id (Intercept) 0.005383 0.07337
## distractor_label (Intercept) 0.003738 0.06114
## target_label (Intercept) 0.004152 0.06443
## dataset_name (Intercept) 0.002502 0.05002
## Residual 0.079522 0.28200
## Number of obs: 18791, groups:
## administration_id, 1294; distractor_label, 131; target_label, 128; dataset_name, 10
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 6.623e-01 2.032e-02 1.430e+01 32.593 7.91e-15
## target_log_freq_centered 3.135e-03 6.586e-03 1.043e+02 0.476 0.635
## distractor_log_freq_centered 4.670e-03 5.651e-03 9.377e+01 0.826 0.411
## age_centered 5.397e-03 3.963e-04 1.588e+03 13.618 < 2e-16
##
## (Intercept) ***
## target_log_freq_centered
## distractor_log_freq_centered
## age_centered ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trg___ dst___
## trgt_lg_fr_ 0.147
## dstrctr_l__ 0.128 -0.049
## age_centerd -0.029 0.003 0.002
Restrict to items with >20 observations
m2A_red <- lmer(prop_looking ~ target_log_freq_centered+distractor_log_freq_centered+age_centered+(1|administration_id)+(1|dataset_name)+(1|target_label)+(1|distractor_label), data = filter(acc_mod_freq,trial_count>20))
summary(m2A_red)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## prop_looking ~ target_log_freq_centered + distractor_log_freq_centered +
## age_centered + (1 | administration_id) + (1 | dataset_name) +
## (1 | target_label) + (1 | distractor_label)
## Data: filter(acc_mod_freq, trial_count > 20)
##
## REML criterion at convergence: 6686.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9352 -0.5821 0.1348 0.7553 2.3422
##
## Random effects:
## Groups Name Variance Std.Dev.
## administration_id (Intercept) 0.005406 0.07352
## distractor_label (Intercept) 0.003814 0.06176
## target_label (Intercept) 0.003583 0.05986
## dataset_name (Intercept) 0.002407 0.04906
## Residual 0.079364 0.28172
## Number of obs: 18567, groups:
## administration_id, 1294; distractor_label, 121; target_label, 88; dataset_name, 10
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 6.660e-01 2.009e-02 1.489e+01 33.149 2.29e-15
## target_log_freq_centered 7.751e-03 7.410e-03 7.572e+01 1.046 0.299
## distractor_log_freq_centered 4.942e-03 5.790e-03 8.675e+01 0.854 0.396
## age_centered 5.392e-03 3.960e-04 1.579e+03 13.616 < 2e-16
##
## (Intercept) ***
## target_log_freq_centered
## distractor_log_freq_centered
## age_centered ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trg___ dst___
## trgt_lg_fr_ 0.115
## dstrctr_l__ 0.135 -0.080
## age_centerd -0.032 0.010 0.000
Next, I focused on predicting proportion target looking on each trial from target AOA, and then subsequently more complex models also including distractor AOA. The issue of generalizing across items is very similar to the case of frequency, likely for similar reasons.
Predict proportion target looking from target aoa controlling for age.
m3 <- lmer(prop_looking ~ target_aoa_centered+age_centered+(1|administration_id)+(1|dataset_name), data = acc_mod_freq_aoa)
summary(m3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## prop_looking ~ target_aoa_centered + age_centered + (1 | administration_id) +
## (1 | dataset_name)
## Data: acc_mod_freq_aoa
##
## REML criterion at convergence: 6766.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9840 -0.5794 0.1241 0.7634 2.3476
##
## Random effects:
## Groups Name Variance Std.Dev.
## administration_id (Intercept) 0.006773 0.0823
## dataset_name (Intercept) 0.002275 0.0477
## Residual 0.079790 0.2825
## Number of obs: 18811, groups: administration_id, 1291; dataset_name, 10
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 6.748e-01 1.586e-02 8.526e+00 42.558 3.13e-11 ***
## target_aoa_centered -1.749e-03 8.107e-04 9.659e+03 -2.158 0.031 *
## age_centered 5.568e-03 3.938e-04 1.283e+03 14.140 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trgt__
## trgt__cntrd -0.040
## age_centerd -0.035 -0.010
m3A <- lmer(prop_looking ~ target_aoa_centered+age_centered+(1|administration_id)+(1|dataset_name)+(1|target_label), data = acc_mod_freq_aoa)
summary(m3A)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## prop_looking ~ target_aoa_centered + age_centered + (1 | administration_id) +
## (1 | dataset_name) + (1 | target_label)
## Data: acc_mod_freq_aoa
##
## REML criterion at convergence: 6342.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.0564 -0.5753 0.1225 0.7469 2.3184
##
## Random effects:
## Groups Name Variance Std.Dev.
## administration_id (Intercept) 0.006970 0.08349
## target_label (Intercept) 0.006643 0.08150
## dataset_name (Intercept) 0.003183 0.05641
## Residual 0.077129 0.27772
## Number of obs: 18811, groups:
## administration_id, 1291; target_label, 100; dataset_name, 10
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 6.715e-01 2.286e-02 1.595e+01 29.379 2.58e-15 ***
## target_aoa_centered -2.490e-03 3.214e-03 7.783e+01 -0.775 0.441
## age_centered 5.632e-03 3.956e-04 1.687e+03 14.236 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trgt__
## trgt__cntrd -0.363
## age_centerd -0.019 -0.027
m3A_red <- lmer(prop_looking ~ target_aoa_centered+age_centered+(1|administration_id)+(1|dataset_name)+(1|target_label), data = filter(acc_mod_freq_aoa,trial_count>20))
summary(m3A_red)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## prop_looking ~ target_aoa_centered + age_centered + (1 | administration_id) +
## (1 | dataset_name) + (1 | target_label)
## Data: filter(acc_mod_freq_aoa, trial_count > 20)
##
## REML criterion at convergence: 6262.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.0592 -0.5730 0.1227 0.7451 2.3175
##
## Random effects:
## Groups Name Variance Std.Dev.
## administration_id (Intercept) 0.006989 0.08360
## target_label (Intercept) 0.005874 0.07664
## dataset_name (Intercept) 0.003095 0.05563
## Residual 0.077017 0.27752
## Number of obs: 18685, groups:
## administration_id, 1288; target_label, 79; dataset_name, 10
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 6.745e-01 2.255e-02 1.607e+01 29.92 1.61e-15 ***
## target_aoa_centered -2.750e-03 3.162e-03 7.715e+01 -0.87 0.387
## age_centered 5.624e-03 3.954e-04 1.672e+03 14.22 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trgt__
## trgt__cntrd -0.349
## age_centerd -0.022 -0.028
We considered an alternate analysis in which we considered relative aoa (age - age of acquisition). While this is a strong predictor, this is likely largely due to the strong relationship between age and proportion target looking.
#compute "relative" AOA
acc_mod_freq_aoa <- acc_mod_freq_aoa %>%
mutate(relative_target_aoa=age-target_aoa,
relative_distractor_aoa=age-distractor_aoa)
ggplot(acc_mod_freq_aoa,aes(relative_target_aoa,prop_looking))+
geom_point()+
geom_smooth()
##target alone
m_diff_target <- lmer(prop_looking ~ relative_target_aoa+(1|administration_id)+(1|dataset_name)+(1|target_label)+(1|distractor_label), data = acc_mod_freq_aoa)
summary(m_diff_target)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: prop_looking ~ relative_target_aoa + (1 | administration_id) +
## (1 | dataset_name) + (1 | target_label) + (1 | distractor_label)
## Data: acc_mod_freq_aoa
##
## REML criterion at convergence: 6278.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.0273 -0.5789 0.1179 0.7450 2.3507
##
## Random effects:
## Groups Name Variance Std.Dev.
## administration_id (Intercept) 0.006927 0.08323
## target_label (Intercept) 0.005379 0.07334
## distractor_label (Intercept) 0.003374 0.05809
## dataset_name (Intercept) 0.003045 0.05518
## Residual 0.076729 0.27700
## Number of obs: 18811, groups:
## administration_id, 1291; target_label, 100; distractor_label, 91; dataset_name, 10
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 6.202e-01 2.258e-02 1.466e+01 27.46 5.09e-14 ***
## relative_target_aoa 5.615e-03 3.933e-04 1.848e+03 14.28 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## reltv_trgt_ -0.153
##distractor alone
m_diff_distractor <- lmer(prop_looking ~ relative_distractor_aoa+(1|administration_id)+(1|dataset_name)+(1|target_label)+(1|distractor_label), data = acc_mod_freq_aoa)
summary(m_diff_distractor)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: prop_looking ~ relative_distractor_aoa + (1 | administration_id) +
## (1 | dataset_name) + (1 | target_label) + (1 | distractor_label)
## Data: acc_mod_freq_aoa
##
## REML criterion at convergence: 6285.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.0265 -0.5795 0.1178 0.7455 2.3482
##
## Random effects:
## Groups Name Variance Std.Dev.
## administration_id (Intercept) 0.006923 0.08321
## target_label (Intercept) 0.005616 0.07494
## distractor_label (Intercept) 0.003482 0.05901
## dataset_name (Intercept) 0.003105 0.05572
## Residual 0.076745 0.27703
## Number of obs: 18811, groups:
## administration_id, 1291; target_label, 100; distractor_label, 91; dataset_name, 10
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 6.208e-01 2.284e-02 1.452e+01 27.18 7.32e-14 ***
## relative_distractor_aoa 5.515e-03 3.932e-04 1.845e+03 14.03 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## rltv_dstrc_ -0.151
## target and distractor
m_diff_td <- lmer(prop_looking ~ relative_target_aoa+relative_distractor_aoa+(1|administration_id)+(1|dataset_name)+(1|target_label)+(1|distractor_label), data = acc_mod_freq_aoa)
summary(m_diff_td)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: prop_looking ~ relative_target_aoa + relative_distractor_aoa +
## (1 | administration_id) + (1 | dataset_name) + (1 | target_label) +
## (1 | distractor_label)
## Data: acc_mod_freq_aoa
##
## REML criterion at convergence: 6287.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.0255 -0.5790 0.1177 0.7450 2.3529
##
## Random effects:
## Groups Name Variance Std.Dev.
## administration_id (Intercept) 0.006930 0.08325
## target_label (Intercept) 0.005552 0.07451
## distractor_label (Intercept) 0.003537 0.05947
## dataset_name (Intercept) 0.003012 0.05488
## Residual 0.076714 0.27697
## Number of obs: 18811, groups:
## administration_id, 1291; target_label, 100; distractor_label, 91; dataset_name, 10
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.620420 0.022634 14.956755 27.411 3.36e-14 ***
## relative_target_aoa 0.008617 0.003056 92.403614 2.820 0.00588 **
## relative_distractor_aoa -0.003025 0.003054 92.167075 -0.991 0.32449
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) rltv_t_
## reltv_trgt_ -0.009
## rltv_dstrc_ -0.011 -0.992
m4 <- lmer(prop_looking ~ target_aoa_centered+distractor_aoa_centered+age_centered+(1|administration_id)+(1|dataset_name), data = acc_mod_freq_aoa)
summary(m4)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: prop_looking ~ target_aoa_centered + distractor_aoa_centered +
## age_centered + (1 | administration_id) + (1 | dataset_name)
## Data: acc_mod_freq_aoa
##
## REML criterion at convergence: 6774.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9697 -0.5811 0.1264 0.7644 2.3498
##
## Random effects:
## Groups Name Variance Std.Dev.
## administration_id (Intercept) 0.006767 0.08226
## dataset_name (Intercept) 0.002288 0.04783
## Residual 0.079778 0.28245
## Number of obs: 18811, groups: administration_id, 1291; dataset_name, 10
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 6.750e-01 1.590e-02 8.534e+00 42.459 3.13e-11 ***
## target_aoa_centered -8.108e-04 9.351e-04 1.620e+04 -0.867 0.386
## distractor_aoa_centered -1.813e-03 9.002e-04 1.716e+04 -2.014 0.044 *
## age_centered 5.570e-03 3.938e-04 1.292e+03 14.145 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trgt__ dstr__
## trgt__cntrd -0.032
## dstrctr__cn -0.006 -0.498
## age_centerd -0.035 -0.008 -0.002
m4A <- lmer(prop_looking ~ target_aoa_centered+distractor_aoa_centered+age_centered+(1|administration_id)+(1|dataset_name)+(1|target_label)+(1|distractor_label), data = acc_mod_freq_aoa)
summary(m4A)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: prop_looking ~ target_aoa_centered + distractor_aoa_centered +
## age_centered + (1 | administration_id) + (1 | dataset_name) +
## (1 | target_label) + (1 | distractor_label)
## Data: acc_mod_freq_aoa
##
## REML criterion at convergence: 6296.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.0257 -0.5793 0.1177 0.7455 2.3478
##
## Random effects:
## Groups Name Variance Std.Dev.
## administration_id (Intercept) 0.006933 0.08326
## target_label (Intercept) 0.005633 0.07505
## distractor_label (Intercept) 0.003544 0.05953
## dataset_name (Intercept) 0.002817 0.05308
## Residual 0.076714 0.27697
## Number of obs: 18811, groups:
## administration_id, 1291; target_label, 100; distractor_label, 91; dataset_name, 10
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 6.664e-01 2.402e-02 1.990e+01 27.739 <2e-16 ***
## target_aoa_centered -6.959e-03 3.743e-03 6.968e+01 -1.859 0.0672 .
## distractor_aoa_centered 4.600e-03 3.642e-03 6.647e+01 1.263 0.2110
## age_centered 5.607e-03 3.943e-04 1.634e+03 14.222 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trgt__ dstr__
## trgt__cntrd -0.259
## dstrctr__cn -0.192 -0.373
## age_centerd -0.015 -0.024 -0.017
m4A_red <- lmer(prop_looking ~ target_aoa_centered+distractor_aoa_centered+age_centered+(1|administration_id)+(1|dataset_name)+(1|target_label)+(1|distractor_label), data = filter(acc_mod_freq_aoa,trial_count>20))
summary(m4A_red)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: prop_looking ~ target_aoa_centered + distractor_aoa_centered +
## age_centered + (1 | administration_id) + (1 | dataset_name) +
## (1 | target_label) + (1 | distractor_label)
## Data: filter(acc_mod_freq_aoa, trial_count > 20)
##
## REML criterion at convergence: 6216.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.0264 -0.5785 0.1179 0.7453 2.3494
##
## Random effects:
## Groups Name Variance Std.Dev.
## administration_id (Intercept) 0.006966 0.08346
## distractor_label (Intercept) 0.003534 0.05944
## target_label (Intercept) 0.004429 0.06655
## dataset_name (Intercept) 0.002646 0.05143
## Residual 0.076615 0.27679
## Number of obs: 18685, groups:
## administration_id, 1288; distractor_label, 87; target_label, 79; dataset_name, 10
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 6.674e-01 2.321e-02 1.997e+01 28.750 <2e-16 ***
## target_aoa_centered -7.589e-03 3.718e-03 7.286e+01 -2.041 0.0449 *
## distractor_aoa_centered 5.117e-03 3.681e-03 6.649e+01 1.390 0.1691
## age_centered 5.604e-03 3.940e-04 1.592e+03 14.223 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trgt__ dstr__
## trgt__cntrd -0.205
## dstrctr__cn -0.215 -0.417
## age_centerd -0.019 -0.028 -0.014
Using Mike Frank’s suggestion for a Luce choice predictor (1/target_aoa)/(1/target_aoa+1/distractor_aoa)
m4B <- lmer(prop_looking ~ luce+age_centered+(1|administration_id)+(1|dataset_name), data = acc_mod_freq_aoa)
summary(m4B)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: prop_looking ~ luce + age_centered + (1 | administration_id) +
## (1 | dataset_name)
## Data: acc_mod_freq_aoa
##
## REML criterion at convergence: 6762
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9779 -0.5769 0.1258 0.7631 2.3433
##
## Random effects:
## Groups Name Variance Std.Dev.
## administration_id (Intercept) 0.006783 0.08236
## dataset_name (Intercept) 0.002373 0.04871
## Residual 0.079802 0.28249
## Number of obs: 18811, groups: administration_id, 1291; dataset_name, 10
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 6.942e-01 3.050e-02 1.079e+02 22.76 <2e-16 ***
## luce -4.148e-02 5.185e-02 1.813e+04 -0.80 0.424
## age_centered 5.561e-03 3.943e-04 1.327e+03 14.10 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) luce
## luce -0.848
## age_centerd -0.022 0.004
m4C <- lmer(prop_looking ~ luce+age_centered+(1|administration_id)+(1|dataset_name)+(1|target_label)+(1|distractor_label), data = acc_mod_freq_aoa)
summary(m4C)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: prop_looking ~ luce + age_centered + (1 | administration_id) +
## (1 | dataset_name) + (1 | target_label) + (1 | distractor_label)
## Data: acc_mod_freq_aoa
##
## REML criterion at convergence: 6279.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.0254 -0.5784 0.1181 0.7454 2.3430
##
## Random effects:
## Groups Name Variance Std.Dev.
## administration_id (Intercept) 0.006934 0.08327
## target_label (Intercept) 0.005631 0.07504
## distractor_label (Intercept) 0.003526 0.05938
## dataset_name (Intercept) 0.002596 0.05096
## Residual 0.076715 0.27698
## Number of obs: 18811, groups:
## administration_id, 1291; target_label, 100; distractor_label, 91; dataset_name, 10
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 4.542e-01 1.181e-01 8.533e+01 3.846 0.000231 ***
## luce 4.108e-01 2.324e-01 8.470e+01 1.768 0.080688 .
## age_centered 5.595e-03 3.934e-04 1.498e+03 14.223 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) luce
## luce -0.983
## age_centerd -0.010 0.004
Finally, we combined all frequency and AOA predictors into a single model. Again, almost all putative effects do not “survive” including by-item random effects (with the exception of perhaps target word AOA).
m5 <- lmer(prop_looking ~ target_log_freq_centered+distractor_log_freq_centered+target_aoa_centered+distractor_aoa_centered+age_centered+(1|administration_id)+(1|dataset_name), data = acc_mod_freq_aoa)
summary(m5)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## prop_looking ~ target_log_freq_centered + distractor_log_freq_centered +
## target_aoa_centered + distractor_aoa_centered + age_centered +
## (1 | administration_id) + (1 | dataset_name)
## Data: acc_mod_freq_aoa
##
## REML criterion at convergence: 6729.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9164 -0.5824 0.1306 0.7592 2.2934
##
## Random effects:
## Groups Name Variance Std.Dev.
## administration_id (Intercept) 0.006802 0.08247
## dataset_name (Intercept) 0.002353 0.04851
## Residual 0.079485 0.28193
## Number of obs: 18811, groups: administration_id, 1291; dataset_name, 10
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 6.748e-01 1.612e-02 8.599e+00 41.873 3.05e-11
## target_log_freq_centered 1.400e-02 2.031e-03 1.782e+04 6.894 5.59e-12
## distractor_log_freq_centered -1.217e-02 2.467e-03 1.812e+04 -4.932 8.23e-07
## target_aoa_centered 2.387e-03 1.021e-03 1.526e+04 2.337 0.019436
## distractor_aoa_centered -3.454e-03 1.039e-03 1.733e+04 -3.324 0.000891
## age_centered 5.550e-03 3.940e-04 1.332e+03 14.087 < 2e-16
##
## (Intercept) ***
## target_log_freq_centered ***
## distractor_log_freq_centered ***
## target_aoa_centered *
## distractor_aoa_centered ***
## age_centered ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trg___ dst___ trgt__ dstr__
## trgt_lg_fr_ -0.022
## dstrctr_l__ -0.030 -0.094
## trgt__cntrd -0.035 0.394 -0.133
## dstrctr__cn -0.023 0.080 0.486 -0.410
## age_centerd -0.034 -0.011 -0.005 -0.011 -0.005
Target AOA is the only effect that remains significant after accounting for non-independence between items (targets and distractors).
m5A <- lmer(prop_looking ~ target_log_freq_centered+distractor_log_freq_centered+target_aoa_centered+distractor_aoa_centered+age_centered+(1|administration_id)+(1|dataset_name)+(1|target_label)+(1|distractor_label), data = acc_mod_freq_aoa)
summary(m5A)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## prop_looking ~ target_log_freq_centered + distractor_log_freq_centered +
## target_aoa_centered + distractor_aoa_centered + age_centered +
## (1 | administration_id) + (1 | dataset_name) + (1 | target_label) +
## (1 | distractor_label)
## Data: acc_mod_freq_aoa
##
## REML criterion at convergence: 6310.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.0263 -0.5782 0.1193 0.7462 2.3468
##
## Random effects:
## Groups Name Variance Std.Dev.
## administration_id (Intercept) 0.006931 0.08325
## target_label (Intercept) 0.005924 0.07697
## distractor_label (Intercept) 0.003640 0.06033
## dataset_name (Intercept) 0.002836 0.05325
## Residual 0.076705 0.27696
## Number of obs: 18811, groups:
## administration_id, 1291; target_label, 100; distractor_label, 91; dataset_name, 10
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 6.660e-01 2.448e-02 2.066e+01 27.210 <2e-16
## target_log_freq_centered -7.509e-03 9.345e-03 7.476e+01 -0.804 0.4242
## distractor_log_freq_centered 1.376e-03 1.155e-02 5.874e+01 0.119 0.9056
## target_aoa_centered -8.406e-03 4.169e-03 7.033e+01 -2.016 0.0476
## distractor_aoa_centered 5.110e-03 4.478e-03 5.780e+01 1.141 0.2585
## age_centered 5.610e-03 3.943e-04 1.646e+03 14.227 <2e-16
##
## (Intercept) ***
## target_log_freq_centered
## distractor_log_freq_centered
## target_aoa_centered *
## distractor_aoa_centered
## age_centered ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trg___ dst___ trgt__ dstr__
## trgt_lg_fr_ 0.012
## dstrctr_l__ -0.127 -0.148
## trgt__cntrd -0.233 0.408 -0.077
## dstrctr__cn -0.229 -0.114 0.567 -0.335
## age_centerd -0.014 -0.009 -0.007 -0.026 -0.017
#vif.mer(m5A)