Preliminaries and data loading

library(peekbankr)
library(tidyverse)
library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
library(lmerTest)
## 
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step
library(tictoc)
library(langcog)
## 
## Attaching package: 'langcog'
## The following object is masked from 'package:base':
## 
##     scale
library(here)
## here() starts at /Users/mcfrank/Projects/peekbank/peekbank-paper
t_range <- c(-1000,3000)
knitr::opts_chunk$set(cache = TRUE, warn = FALSE, message = FALSE)

get data

con <- connect_to_peekbank(dbname = "peekbank_dev")
datasets <- get_datasets(connection = con) %>% collect()
administrations <- get_administrations(connection = con) %>% collect()
tic()
aoi_timepoints <- get_aoi_timepoints(connection = con) %>% collect()
toc()
## 32.411 sec elapsed
stimuli <- get_stimuli(connection = con) %>% collect()
trial_types <- get_trial_types(connection = con) %>% collect()
trials <- get_trials(connection = con)  %>% collect()

aoi_data_joined <- aoi_timepoints %>%
  right_join(administrations) %>%
  right_join(trials) %>%
  right_join(trial_types) %>%
  right_join(datasets) %>%
  mutate(stimulus_id = target_id) %>%
  right_join(stimuli) %>%
  filter(t_norm > t_range[1],
         t_norm < t_range[2])

# save(file = "data/aoi_data_joined.Rds", aoi_data_joined)

Initial analysis

load cached data

# load(file = "data/aoi_data_joined.Rds")

subjects

subinfo <- aoi_data_joined %>%
  group_by(subject_id, dataset_id, lab_dataset_id, age) %>%
  summarise(trials = length(unique(trial_id)))

subinfo %>%
  ggplot(aes(x = age, fill = lab_dataset_id)) +
  geom_histogram(binwidth = 1) +
  theme_mikabr() +
  scale_fill_solarized(name = "Dataset") +
  xlab("Age (months)") +
  scale_x_continuous(breaks = c(12,24,36,48,60))
## Warning: Removed 14 rows containing non-finite values (stat_bin).

General Analyses

Time series

means <- aoi_data_joined %>%
  filter(age > 12, age <= 60) %>%
  mutate(age_binned = cut(age, seq(0,60,12))) %>%
  group_by(t_norm, dataset_name, age_binned, stimulus_novelty) %>%
  summarise(n = sum(aoi %in% c("target","distractor"), na.rm = TRUE), 
            p = sum(aoi == "target", na.rm = TRUE),
            prop_looking = p / n, 
            ci_lower = binom::binom.confint(p, n, method = "bayes")$lower,
            ci_upper = binom::binom.confint(p, n, method = "bayes")$upper) 

ggplot(means, 
       aes(x = t_norm, y = prop_looking)) + 
  geom_line(aes(col = dataset_name)) + 
  geom_ribbon(aes(ymin = ci_lower, ymax = ci_upper, 
                  fill = dataset_name), alpha = .5) +
  facet_grid(age_binned~stimulus_novelty) +
  geom_hline(yintercept = .5, lty = 2) + 
  geom_vline(xintercept = 0, lty = 2) +
  ylab("Proportion Target Looking") +
  xlab("Time (msec)") +
  theme_classic() +
  scale_color_solarized() +
  scale_fill_solarized() 

Familiar trials.

window_min <- 300
window_max <- 2300

familiar_means <- aoi_data_joined %>%
  filter(age > 12, age <= 24, 
         stimulus_novelty == "familiar") %>%
  filter(t_norm > window_min, t_norm < window_max) %>%
  group_by(administration_id, trial_id, english_stimulus_label) %>%
  summarise(n = sum(aoi %in% c("target","distractor"), na.rm = TRUE), 
            p = sum(aoi == "target", na.rm = TRUE),
            prop_looking = p / n) %>%
  group_by(english_stimulus_label) %>%
  summarise(mean_prop = mean(prop_looking, na.rm=TRUE), 
            sd_prop = sd(prop_looking, na.rm=TRUE), 
            n = n(),
            ci = 1.96 * sd_prop / sqrt(n)) %>%
  mutate(significant = (mean_prop - ci > .5 & mean_prop + ci > .5) |
           (mean_prop - ci < .5 & mean_prop + ci < .5)) %>%
  ungroup %>%
  mutate(sorted_stimulus_label = fct_reorder(as.factor(english_stimulus_label), 
                                             mean_prop, mean))

ggplot(familiar_means, 
       aes(y = sorted_stimulus_label, x = mean_prop)) + 
  ggstance::geom_linerangeh(aes(xmin = mean_prop - ci, xmax = mean_prop + ci)) + 
  geom_point(aes(shape = significant), size = 2) + 
  scale_shape_manual(values = c(1, 20)) + 
  geom_vline(xintercept = .5, lty = 2) + 
  theme_mikabr()

Reaction time analysis.

SAMPLING_RATE <- 40

rts <- aoi_data_joined %>%
  # filter(!(dataset_name %in% "adams_marchman_2018")) %>%
  group_by(administration_id, trial_id, age, english_stimulus_label, 
           dataset_name, stimulus_novelty, trial_order) %>%
  filter(!all(aoi == "missing"), !all(t_norm < 0)) %>%
  mutate(crit_onset_aoi = aoi[t_norm == 0], 
         fst_shift_land_aoi = rle(aoi[t_norm >= 0])$values[3],
         shift_type = case_when(
           crit_onset_aoi == "distractor" & fst_shift_land_aoi == "target" ~ "D-T",
           crit_onset_aoi == "distractor" & fst_shift_land_aoi == "other" ~ "D-O",
           crit_onset_aoi == "target" & fst_shift_land_aoi == "distractor" ~ "T-D",
           crit_onset_aoi == "target" & fst_shift_land_aoi == "other" ~ "T-O",
           TRUE ~ "other shift")) %>%
  summarise(rt_value = rle(aoi[t_norm >= 0])$lengths[1] * SAMPLING_RATE,
            fst_shift_gap_ms = rle(aoi[t_norm >= 0])$lengths[2] * SAMPLING_RATE) %>%
  group_by(dataset_name) %>%
  filter(rt_value < max(rt_value)-100, !is.na(rt_value)) # note sloppy exclusion of bad RTs (no shift trials)

Descriptive histograms.

ggplot(rts, 
       aes(x = rt_value)) + 
  geom_histogram() + 
  facet_wrap(~dataset_name, scales = "free_y") 

Logs? This is cool as it suggests that the < 300 ms RTs really are junk.

ggplot(rts, 
       aes(x = rt_value)) + 
  geom_histogram() + 
  scale_x_log10() + 
  facet_wrap(~dataset_name, scales = "free_y") 

RT by item?

rt_means <- rts %>%
  filter(age > 6, age < 66) %>%
  mutate(age_binned = cut(age, seq(6,66,12))) %>%
  group_by(administration_id, trial_id, english_stimulus_label, age_binned) %>%
  summarise(sub_mean_rt = mean(rt_value)) %>%
  group_by(english_stimulus_label, age_binned) %>%
  summarise(mean_rt = mean(sub_mean_rt), 
            sd_rt = sd(sub_mean_rt), 
            n = n(),
            ci = 1.96 * sd_rt / sqrt(n)) %>%
  filter(n > 10) %>%
  mutate(significant = (mean_rt - ci > .5 & mean_rt + ci > .5) |
           (mean_rt - ci < .5 & mean_rt + ci < .5)) %>%
  # group_by(age_binned) %>%
  mutate(sorted_stimulus_label = fct_reorder(as.factor(english_stimulus_label), 
                                             mean_rt, mean))

ggplot(rt_means, 
       aes(y = sorted_stimulus_label, x = mean_rt)) + 
  ggstance::geom_linerangeh(aes(xmin = mean_rt - ci, xmax = mean_rt + ci)) + 
  geom_point(aes(col = age_binned), size = 2) + 
  scale_shape_manual(values = c(1, 20)) + 
  geom_vline(xintercept = .5, lty = 2) + 
  theme_mikabr()

Overall RT curve across age.

rt_subs <- rts %>%
  filter(stimulus_novelty == "familiar", rt_value > 300, rt_value < 3000) %>%
  group_by(administration_id, trial_id, english_stimulus_label, age) %>%
  summarise(sub_mean_rt = mean(rt_value)) 


ggplot(rt_subs, 
       aes(x = age, y = sub_mean_rt)) + 
  geom_jitter(alpha = .03, width = .3, height = 0) +
  geom_smooth() + 
  scale_y_log10() +
  xlim(12,60) + 
  theme_mikabr() 
## Warning: Removed 228 rows containing non-finite values (stat_smooth).
## Warning: Removed 297 rows containing missing values (geom_point).

Overall RT curve across trial order

rt_subs <- rts %>%
  filter(stimulus_novelty == "familiar", rt_value > 300, rt_value < 3000) %>%
  group_by(administration_id, trial_id, english_stimulus_label, dataset_name, 
           trial_order) %>%
  summarise(sub_mean_rt = mean(rt_value)) 


ggplot(rt_subs, 
       aes(x = trial_order, y = sub_mean_rt)) + 
  geom_point(alpha = .1) +
  geom_smooth(method = "lm") + 
  scale_y_log10() + 
  xlim(12,60) + 
  theme_mikabr() 
## Warning: Removed 5354 rows containing non-finite values (stat_smooth).
## Warning: Removed 5354 rows containing missing values (geom_point).

rt_subs <- rts %>%
  filter(stimulus_novelty == "familiar", rt_value > 300, rt_value < 3000) %>%
  group_by(administration_id, trial_id, english_stimulus_label, age, dataset_name) %>%
  summarise(sub_mean_rt = mean(rt_value)) %>%
  group_by(english_stimulus_label) %>%
  filter(n() > 100) 

ggplot(rt_subs, 
       aes(x = age, y = sub_mean_rt, col = dataset_name)) + 
  geom_point(alpha = .1) +
  geom_smooth(method = "lm", formula = y ~ x) + 
  facet_wrap(~english_stimulus_label) + 
  ylim(0,4000) +
  theme_mikabr()
## Warning: Removed 79 rows containing non-finite values (stat_smooth).
## Warning: Removed 79 rows containing missing values (geom_point).
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

Models

Create dataframe.

rt_model_data <- rts %>%
  filter(rt_value > 300) %>%
  mutate(log_rt = log(rt_value), 
         age_centered  = scale(age, scale = FALSE))

Sanity check the lm model with no random effects.

summary(lm(log_rt ~ age_centered * stimulus_novelty + age_centered * trial_order, 
           data = rt_model_data))
## 
## Call:
## lm(formula = log_rt ~ age_centered * stimulus_novelty + age_centered * 
##     trial_order, data = rt_model_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.49238 -0.49623 -0.00881  0.51165  1.55089 
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         7.081e+00  1.043e-02 679.052  < 2e-16 ***
## age_centered                       -4.426e-03  1.178e-03  -3.756 0.000173 ***
## stimulus_noveltynovel              -5.428e-02  1.218e-02  -4.457 8.36e-06 ***
## trial_order                         2.868e-03  5.871e-04   4.886 1.04e-06 ***
## age_centered:stimulus_noveltynovel  3.487e-03  1.078e-03   3.236 0.001215 ** 
## age_centered:trial_order           -1.377e-04  9.535e-05  -1.444 0.148823    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6812 on 17406 degrees of freedom
##   (184 observations deleted due to missingness)
## Multiple R-squared:  0.006905,   Adjusted R-squared:  0.00662 
## F-statistic: 24.21 on 5 and 17406 DF,  p-value: < 2.2e-16

Go for it.

rt_mod <- lmer(log_rt ~ age_centered * stimulus_novelty + 
                 age_centered * trial_order + 
                 (1 | english_stimulus_label) + (1 | dataset_name), 
               data = rt_model_data) 

summary(rt_mod)$coef %>%
  knitr::kable(digits = 3)
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 7.097 0.041 7.819 174.277 0.000
age_centered -0.005 0.001 17369.014 -3.946 0.000
stimulus_noveltynovel 0.055 0.022 12.058 2.476 0.029
trial_order 0.001 0.001 16115.894 1.676 0.094
age_centered:stimulus_noveltynovel 0.004 0.001 17363.013 3.304 0.001
age_centered:trial_order 0.000 0.000 17374.935 -1.240 0.215

Accuracy

Exploration

Start by summarizing.

t_min <- 300
t_max <- 2300

by_trial_means <- aoi_data_joined %>%
  filter(age > 12, age <= 60, stimulus_novelty == "familiar",
         t_norm > t_min, t_norm < t_max) %>%
  mutate(age_binned = cut(age, seq(12,60,12))) %>%
  rename(target_label = english_stimulus_label) %>%
  group_by(administration_id, trial_id, target_label, distractor_id, 
           age, age_binned) %>%
  summarise(prop_looking = sum(aoi == "target", na.rm = TRUE) / 
              (sum(aoi == "target", na.rm=TRUE) + 
                 sum(aoi=="distractor", na.rm=TRUE)),
            prop_missing = mean(aoi == "missing", na.rm = TRUE)) %>%
  left_join(stimuli, by = c("distractor_id" = "stimulus_id")) %>%
  rename(distractor_label = english_stimulus_label)

Missingness.

hist(by_trial_means$prop_missing)

Preliminary item analysis.

item_means <- by_trial_means %>%
  group_by(target_label, age_binned) %>%
  summarise(mean_prop = mean(prop_looking), 
            sd_prop = sd(prop_looking), 
            ci = 1.96 * sd_prop / sqrt(n()))

ggplot(item_means, aes(x = mean_prop, y = target_label, col = age_binned)) +
  ggstance::geom_pointrangeh(aes(xmin = mean_prop - ci, xmax = mean_prop + ci)) +
  geom_vline(xintercept = .5, lty = 2) +
  xlim(0,1) +
  theme_mikabr()
## Warning: Removed 99 rows containing missing values (geom_pointrangeh).

Models

Make some models!

acc_mod_data <- by_trial_means %>%
  filter(prop_missing < .3) 

acc_mod_data$age_centered <- acc_mod_data$age - mean(acc_mod_data$age)


m1 <- lmer(prop_looking ~ age_centered
           + (1| administration_id) + (1 | target_label),
           data = acc_mod_data)

summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: prop_looking ~ age_centered + (1 | administration_id) + (1 |  
##     target_label)
##    Data: acc_mod_data
## 
## REML criterion at convergence: 5864.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.7883 -0.6100  0.1299  0.7793  2.3271 
## 
## Random effects:
##  Groups            Name        Variance Std.Dev.
##  administration_id (Intercept) 0.005283 0.07269 
##  target_label      (Intercept) 0.005629 0.07503 
##  Residual                      0.082712 0.28760 
## Number of obs: 14792, groups:  administration_id, 1097; target_label, 115
## 
## Fixed effects:
##               Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)  6.173e-01  1.060e-02 8.065e+01   58.22   <2e-16 ***
## age_centered 4.336e-03  3.452e-04 2.943e+03   12.56   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## age_centerd -0.056

add age slopes!

m2 <- lmer(prop_looking ~ age_centered 
           + (1 | administration_id) + (age_centered || target_label),
           data = acc_mod_data)

summary(m2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## prop_looking ~ age_centered + (1 | administration_id) + (age_centered ||  
##     target_label)
##    Data: acc_mod_data
## 
## REML criterion at convergence: 5835.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.7976 -0.6119  0.1265  0.7795  2.3444 
## 
## Random effects:
##  Groups            Name         Variance  Std.Dev.
##  administration_id (Intercept)  5.275e-03 0.072627
##  target_label      (Intercept)  5.770e-03 0.075957
##  target_label.1    age_centered 1.531e-05 0.003913
##  Residual                       8.231e-02 0.286896
## Number of obs: 14792, groups:  administration_id, 1097; target_label, 115
## 
## Fixed effects:
##               Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)  6.125e-01  1.090e-02 8.326e+01  56.170  < 2e-16 ***
## age_centered 3.296e-03  9.317e-04 3.245e+01   3.537  0.00124 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## age_centerd 0.062

distractors

m3 <- lmer(prop_looking ~ age_centered
           + (1 | administration_id) + (age_centered || target_label) +
             + (1 | distractor_label),
           data = acc_mod_data)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00466064 (tol = 0.002, component 1)
summary(m3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## prop_looking ~ age_centered + (1 | administration_id) + (age_centered ||  
##     target_label) + +(1 | distractor_label)
##    Data: acc_mod_data
## 
## REML criterion at convergence: 5779.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.8153 -0.6092  0.1304  0.7718  2.3441 
## 
## Random effects:
##  Groups            Name         Variance  Std.Dev.
##  administration_id (Intercept)  5.190e-03 0.072042
##  target_label      (Intercept)  5.426e-03 0.073660
##  target_label.1    age_centered 6.594e-06 0.002568
##  distractor_label  (Intercept)  3.552e-03 0.059600
##  Residual                       8.178e-02 0.285975
## Number of obs: 14792, groups:  
## administration_id, 1097; target_label, 115; distractor_label, 115
## 
## Fixed effects:
##               Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)  6.114e-01  1.315e-02 8.418e+01  46.482  < 2e-16 ***
## age_centered 4.205e-03  7.742e-04 1.800e+01   5.432 3.68e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## age_centerd 0.012 
## convergence code: 0
## Model failed to converge with max|grad| = 0.00466064 (tol = 0.002, component 1)

age for distractors

m4 <- lmer(prop_looking ~ age_centered
           + (1 | administration_id) + (age_centered || target_label) +
             + (age_centered || distractor_label),
           data = acc_mod_data)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0157822 (tol = 0.002, component 1)
summary(m4)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## prop_looking ~ age_centered + (1 | administration_id) + (age_centered ||  
##     target_label) + +(age_centered || distractor_label)
##    Data: acc_mod_data
## 
## REML criterion at convergence: 5779.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.8151 -0.6094  0.1303  0.7717  2.3444 
## 
## Random effects:
##  Groups             Name         Variance  Std.Dev. 
##  administration_id  (Intercept)  5.192e-03 0.0720562
##  target_label       (Intercept)  5.448e-03 0.0738096
##  target_label.1     age_centered 6.498e-06 0.0025491
##  distractor_label   (Intercept)  3.541e-03 0.0595093
##  distractor_label.1 age_centered 8.861e-08 0.0002977
##  Residual                        8.178e-02 0.2859714
## Number of obs: 14792, groups:  
## administration_id, 1097; target_label, 115; distractor_label, 115
## 
## Fixed effects:
##               Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)  6.114e-01  1.316e-02 8.398e+01  46.461  < 2e-16 ***
## age_centered 4.196e-03  7.754e-04 1.742e+01   5.412 4.29e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## age_centerd 0.012 
## convergence code: 0
## Model failed to converge with max|grad| = 0.0157822 (tol = 0.002, component 1)

Accuracy distractor analysis

Question: do distractors matter in word recognition? Here’s the idea: test between a no-distractor model and a distractor-informed model.

Let’s assume each target \(T\) and distractor \(D\) has a recognizability score, with lower being more difficult.

No-distractor model: target accuracy should be about target difficulty \(T\).

Distractor model: target accuracy should be a luce choice of \(T / (T + D)\).

Two plausible models of recognizability would be 1) childes frequency data and 2) wordbank age of acquisition data.

Let’s start with the wordbank data because it’s easy.

We’ll use Wordbank AOAs estimated in the wordbank book with a bayesian GLM.

Larger AOAs are harder, that’s the opposite of recognizability. Let’s try using inverse AoA.

aoas <- read_csv(here("data/bglm_aoas_english.csv"))

aoa_data <- left_join(acc_mod_data, 
                      aoas %>%
                        transmute(target_label = definition, 
                                  target_aoa = bglm_aoa, 
                                  target_category = category)) %>%
  left_join(aoas %>%
              transmute(distractor_label = definition, 
                        distractor_aoa = bglm_aoa)) %>%
  filter(!is.na(target_aoa), !is.na(distractor_aoa)) %>%
  ungroup() %>%
  mutate(target_aoa_centered = target_aoa - mean(target_aoa), 
         distractor_aoa_centered = distractor_aoa - mean(distractor_aoa))
aoa_data <- aoa_data %>%
  mutate(inverse_target = 1/target_aoa,
         inverse_distractor = 1/distractor_aoa,
         luce = inverse_target / (inverse_target + inverse_distractor))
m1 <- lmer(prop_looking ~ luce + (1|administration_id) + (1|dataset_name), 
           data = aoa_data) 
  
summary(m1)$coef %>%
  knitr::kable(digits = 3)
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 0.718 0.035 205.467 20.572 0.000
luce -0.126 0.063 8991.056 -1.984 0.047
m2 <- lmer(prop_looking ~ target_aoa + (1|administration_id) + (1|dataset_name), 
           data = aoa_data)

summary(m2)$coef %>%
  knitr::kable(digits = 3)
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 0.665 0.024 42.548 27.973 0.000
target_aoa -0.001 0.001 6189.088 -0.513 0.608
m3 <- lmer(prop_looking ~ inverse_target + (1|administration_id) + 
             (1|dataset_name), 
           data = aoa_data)

summary(m3)$coef %>%
  knitr::kable(digits = 3)
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 0.664 0.023 36.232 28.784 0.000
inverse_target -0.156 0.296 7121.531 -0.525 0.599

Add age.

m1_age <- lmer(prop_looking ~ luce * age + (1 | administration_id) + 
                 (1 | dataset_name), 
           data = aoa_data)

summary(m1_age)$coef %>%
  knitr::kable(digits = 3)
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 0.310 0.083 2784.277 3.743 0.000
luce 0.402 0.162 9122.254 2.490 0.013
age 0.017 0.003 9311.413 5.255 0.000
luce:age -0.023 0.007 9184.104 -3.545 0.000

And visualize.

ggplot(aoa_data %>%
         mutate(luce_binned = cut(luce, quantile(luce, c(0,.25, .5, .75, 1)), 
                                  include.lowest = TRUE)), 
       aes(x = age, y = prop_looking, col = luce_binned)) + 
  geom_jitter(alpha = .1, width = .2) + 
  geom_smooth(method = "lm")
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).