Preliminaries and data loading

library(peekbankr)
library(tidyverse)
library(lme4)

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

library(lmerTest)

## 
## Attaching package: 'lmerTest'

## The following object is masked from 'package:lme4':
## 
##     lmer

## The following object is masked from 'package:stats':
## 
##     step

library(tictoc)
library(langcog)

## 
## Attaching package: 'langcog'

## The following object is masked from 'package:base':
## 
##     scale

library(here)

## here() starts at /Users/mcfrank/Projects/peekbank/peekbank-paper

t_range <- c(-1000,3000)
knitr::opts_chunk$set(cache = TRUE, warn = FALSE, message = FALSE)

get data

con <- connect_to_peekbank(dbname = "peekbank_dev")
datasets <- get_datasets(connection = con) %>% collect()
administrations <- get_administrations(connection = con) %>% collect()
tic()
aoi_timepoints <- get_aoi_timepoints(connection = con) %>% collect()
toc()

## 32.411 sec elapsed

stimuli <- get_stimuli(connection = con) %>% collect()
trial_types <- get_trial_types(connection = con) %>% collect()
trials <- get_trials(connection = con)  %>% collect()

aoi_data_joined <- aoi_timepoints %>%
  right_join(administrations) %>%
  right_join(trials) %>%
  right_join(trial_types) %>%
  right_join(datasets) %>%
  mutate(stimulus_id = target_id) %>%
  right_join(stimuli) %>%
  filter(t_norm > t_range[1],
         t_norm < t_range[2])

# save(file = "data/aoi_data_joined.Rds", aoi_data_joined)

Initial analysis

load cached data

# load(file = "data/aoi_data_joined.Rds")

subjects

subinfo <- aoi_data_joined %>%
  group_by(subject_id, dataset_id, lab_dataset_id, age) %>%
  summarise(trials = length(unique(trial_id)))

subinfo %>%
  ggplot(aes(x = age, fill = lab_dataset_id)) +
  geom_histogram(binwidth = 1) +
  theme_mikabr() +
  scale_fill_solarized(name = "Dataset") +
  xlab("Age (months)") +
  scale_x_continuous(breaks = c(12,24,36,48,60))

## Warning: Removed 14 rows containing non-finite values (stat_bin).

General Analyses

Time series

means <- aoi_data_joined %>%
  filter(age > 12, age <= 60) %>%
  mutate(age_binned = cut(age, seq(0,60,12))) %>%
  group_by(t_norm, dataset_name, age_binned, stimulus_novelty) %>%
  summarise(n = sum(aoi %in% c("target","distractor"), na.rm = TRUE), 
            p = sum(aoi == "target", na.rm = TRUE),
            prop_looking = p / n, 
            ci_lower = binom::binom.confint(p, n, method = "bayes")$lower,
            ci_upper = binom::binom.confint(p, n, method = "bayes")$upper) 

ggplot(means, 
       aes(x = t_norm, y = prop_looking)) + 
  geom_line(aes(col = dataset_name)) + 
  geom_ribbon(aes(ymin = ci_lower, ymax = ci_upper, 
                  fill = dataset_name), alpha = .5) +
  facet_grid(age_binned~stimulus_novelty) +
  geom_hline(yintercept = .5, lty = 2) + 
  geom_vline(xintercept = 0, lty = 2) +
  ylab("Proportion Target Looking") +
  xlab("Time (msec)") +
  theme_classic() +
  scale_color_solarized() +
  scale_fill_solarized()

Familiar trials.

window_min <- 300
window_max <- 2300

familiar_means <- aoi_data_joined %>%
  filter(age > 12, age <= 24, 
         stimulus_novelty == "familiar") %>%
  filter(t_norm > window_min, t_norm < window_max) %>%
  group_by(administration_id, trial_id, english_stimulus_label) %>%
  summarise(n = sum(aoi %in% c("target","distractor"), na.rm = TRUE), 
            p = sum(aoi == "target", na.rm = TRUE),
            prop_looking = p / n) %>%
  group_by(english_stimulus_label) %>%
  summarise(mean_prop = mean(prop_looking, na.rm=TRUE), 
            sd_prop = sd(prop_looking, na.rm=TRUE), 
            n = n(),
            ci = 1.96 * sd_prop / sqrt(n)) %>%
  mutate(significant = (mean_prop - ci > .5 & mean_prop + ci > .5) |
           (mean_prop - ci < .5 & mean_prop + ci < .5)) %>%
  ungroup %>%
  mutate(sorted_stimulus_label = fct_reorder(as.factor(english_stimulus_label), 
                                             mean_prop, mean))

ggplot(familiar_means, 
       aes(y = sorted_stimulus_label, x = mean_prop)) + 
  ggstance::geom_linerangeh(aes(xmin = mean_prop - ci, xmax = mean_prop + ci)) + 
  geom_point(aes(shape = significant), size = 2) + 
  scale_shape_manual(values = c(1, 20)) + 
  geom_vline(xintercept = .5, lty = 2) + 
  theme_mikabr()

Reaction time analysis.

SAMPLING_RATE <- 40

rts <- aoi_data_joined %>%
  # filter(!(dataset_name %in% "adams_marchman_2018")) %>%
  group_by(administration_id, trial_id, age, english_stimulus_label, 
           dataset_name, stimulus_novelty, trial_order) %>%
  filter(!all(aoi == "missing"), !all(t_norm < 0)) %>%
  mutate(crit_onset_aoi = aoi[t_norm == 0], 
         fst_shift_land_aoi = rle(aoi[t_norm >= 0])$values[3],
         shift_type = case_when(
           crit_onset_aoi == "distractor" & fst_shift_land_aoi == "target" ~ "D-T",
           crit_onset_aoi == "distractor" & fst_shift_land_aoi == "other" ~ "D-O",
           crit_onset_aoi == "target" & fst_shift_land_aoi == "distractor" ~ "T-D",
           crit_onset_aoi == "target" & fst_shift_land_aoi == "other" ~ "T-O",
           TRUE ~ "other shift")) %>%
  summarise(rt_value = rle(aoi[t_norm >= 0])$lengths[1] * SAMPLING_RATE,
            fst_shift_gap_ms = rle(aoi[t_norm >= 0])$lengths[2] * SAMPLING_RATE) %>%
  group_by(dataset_name) %>%
  filter(rt_value < max(rt_value)-100, !is.na(rt_value)) # note sloppy exclusion of bad RTs (no shift trials)

Descriptive histograms.

ggplot(rts, 
       aes(x = rt_value)) + 
  geom_histogram() + 
  facet_wrap(~dataset_name, scales = "free_y")

Logs? This is cool as it suggests that the < 300 ms RTs really are junk.

ggplot(rts, 
       aes(x = rt_value)) + 
  geom_histogram() + 
  scale_x_log10() + 
  facet_wrap(~dataset_name, scales = "free_y")

RT by item?

rt_means <- rts %>%
  filter(age > 6, age < 66) %>%
  mutate(age_binned = cut(age, seq(6,66,12))) %>%
  group_by(administration_id, trial_id, english_stimulus_label, age_binned) %>%
  summarise(sub_mean_rt = mean(rt_value)) %>%
  group_by(english_stimulus_label, age_binned) %>%
  summarise(mean_rt = mean(sub_mean_rt), 
            sd_rt = sd(sub_mean_rt), 
            n = n(),
            ci = 1.96 * sd_rt / sqrt(n)) %>%
  filter(n > 10) %>%
  mutate(significant = (mean_rt - ci > .5 & mean_rt + ci > .5) |
           (mean_rt - ci < .5 & mean_rt + ci < .5)) %>%
  # group_by(age_binned) %>%
  mutate(sorted_stimulus_label = fct_reorder(as.factor(english_stimulus_label), 
                                             mean_rt, mean))

ggplot(rt_means, 
       aes(y = sorted_stimulus_label, x = mean_rt)) + 
  ggstance::geom_linerangeh(aes(xmin = mean_rt - ci, xmax = mean_rt + ci)) + 
  geom_point(aes(col = age_binned), size = 2) + 
  scale_shape_manual(values = c(1, 20)) + 
  geom_vline(xintercept = .5, lty = 2) + 
  theme_mikabr()

Overall RT curve across age.

rt_subs <- rts %>%
  filter(stimulus_novelty == "familiar", rt_value > 300, rt_value < 3000) %>%
  group_by(administration_id, trial_id, english_stimulus_label, age) %>%
  summarise(sub_mean_rt = mean(rt_value)) 


ggplot(rt_subs, 
       aes(x = age, y = sub_mean_rt)) + 
  geom_jitter(alpha = .03, width = .3, height = 0) +
  geom_smooth() + 
  scale_y_log10() +
  xlim(12,60) + 
  theme_mikabr()

## Warning: Removed 228 rows containing non-finite values (stat_smooth).

## Warning: Removed 297 rows containing missing values (geom_point).

Overall RT curve across trial order

rt_subs <- rts %>%
  filter(stimulus_novelty == "familiar", rt_value > 300, rt_value < 3000) %>%
  group_by(administration_id, trial_id, english_stimulus_label, dataset_name, 
           trial_order) %>%
  summarise(sub_mean_rt = mean(rt_value)) 


ggplot(rt_subs, 
       aes(x = trial_order, y = sub_mean_rt)) + 
  geom_point(alpha = .1) +
  geom_smooth(method = "lm") + 
  scale_y_log10() + 
  xlim(12,60) + 
  theme_mikabr()

## Warning: Removed 5354 rows containing non-finite values (stat_smooth).

## Warning: Removed 5354 rows containing missing values (geom_point).

rt_subs <- rts %>%
  filter(stimulus_novelty == "familiar", rt_value > 300, rt_value < 3000) %>%
  group_by(administration_id, trial_id, english_stimulus_label, age, dataset_name) %>%
  summarise(sub_mean_rt = mean(rt_value)) %>%
  group_by(english_stimulus_label) %>%
  filter(n() > 100) 

ggplot(rt_subs, 
       aes(x = age, y = sub_mean_rt, col = dataset_name)) + 
  geom_point(alpha = .1) +
  geom_smooth(method = "lm", formula = y ~ x) + 
  facet_wrap(~english_stimulus_label) + 
  ylim(0,4000) +
  theme_mikabr()

## Warning: Removed 79 rows containing non-finite values (stat_smooth).

## Warning: Removed 79 rows containing missing values (geom_point).

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

Models

Create dataframe.

rt_model_data <- rts %>%
  filter(rt_value > 300) %>%
  mutate(log_rt = log(rt_value), 
         age_centered  = scale(age, scale = FALSE))

Sanity check the lm model with no random effects.

summary(lm(log_rt ~ age_centered * stimulus_novelty + age_centered * trial_order, 
           data = rt_model_data))

## 
## Call:
## lm(formula = log_rt ~ age_centered * stimulus_novelty + age_centered * 
##     trial_order, data = rt_model_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.49238 -0.49623 -0.00881  0.51165  1.55089 
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         7.081e+00  1.043e-02 679.052  < 2e-16 ***
## age_centered                       -4.426e-03  1.178e-03  -3.756 0.000173 ***
## stimulus_noveltynovel              -5.428e-02  1.218e-02  -4.457 8.36e-06 ***
## trial_order                         2.868e-03  5.871e-04   4.886 1.04e-06 ***
## age_centered:stimulus_noveltynovel  3.487e-03  1.078e-03   3.236 0.001215 ** 
## age_centered:trial_order           -1.377e-04  9.535e-05  -1.444 0.148823    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6812 on 17406 degrees of freedom
##   (184 observations deleted due to missingness)
## Multiple R-squared:  0.006905,   Adjusted R-squared:  0.00662 
## F-statistic: 24.21 on 5 and 17406 DF,  p-value: < 2.2e-16

Go for it.

rt_mod <- lmer(log_rt ~ age_centered * stimulus_novelty + 
                 age_centered * trial_order + 
                 (1 | english_stimulus_label) + (1 | dataset_name), 
               data = rt_model_data) 

summary(rt_mod)$coef %>%
  knitr::kable(digits = 3)

	Estimate	Std. Error	df	t value	Pr(>\|t\|)
(Intercept)	7.097	0.041	7.819	174.277	0.000
age_centered	-0.005	0.001	17369.014	-3.946	0.000
stimulus_noveltynovel	0.055	0.022	12.058	2.476	0.029
trial_order	0.001	0.001	16115.894	1.676	0.094
age_centered:stimulus_noveltynovel	0.004	0.001	17363.013	3.304	0.001
age_centered:trial_order	0.000	0.000	17374.935	-1.240	0.215

Accuracy

Exploration

Start by summarizing.

t_min <- 300
t_max <- 2300

by_trial_means <- aoi_data_joined %>%
  filter(age > 12, age <= 60, stimulus_novelty == "familiar",
         t_norm > t_min, t_norm < t_max) %>%
  mutate(age_binned = cut(age, seq(12,60,12))) %>%
  rename(target_label = english_stimulus_label) %>%
  group_by(administration_id, trial_id, target_label, distractor_id, 
           age, age_binned) %>%
  summarise(prop_looking = sum(aoi == "target", na.rm = TRUE) / 
              (sum(aoi == "target", na.rm=TRUE) + 
                 sum(aoi=="distractor", na.rm=TRUE)),
            prop_missing = mean(aoi == "missing", na.rm = TRUE)) %>%
  left_join(stimuli, by = c("distractor_id" = "stimulus_id")) %>%
  rename(distractor_label = english_stimulus_label)

Missingness.

hist(by_trial_means$prop_missing)

Preliminary item analysis.

item_means <- by_trial_means %>%
  group_by(target_label, age_binned) %>%
  summarise(mean_prop = mean(prop_looking), 
            sd_prop = sd(prop_looking), 
            ci = 1.96 * sd_prop / sqrt(n()))

ggplot(item_means, aes(x = mean_prop, y = target_label, col = age_binned)) +
  ggstance::geom_pointrangeh(aes(xmin = mean_prop - ci, xmax = mean_prop + ci)) +
  geom_vline(xintercept = .5, lty = 2) +
  xlim(0,1) +
  theme_mikabr()

## Warning: Removed 99 rows containing missing values (geom_pointrangeh).

Models

Make some models!

acc_mod_data <- by_trial_means %>%
  filter(prop_missing < .3) 

acc_mod_data$age_centered <- acc_mod_data$age - mean(acc_mod_data$age)


m1 <- lmer(prop_looking ~ age_centered
           + (1| administration_id) + (1 | target_label),
           data = acc_mod_data)

summary(m1)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: prop_looking ~ age_centered + (1 | administration_id) + (1 |  
##     target_label)
##    Data: acc_mod_data
## 
## REML criterion at convergence: 5864.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.7883 -0.6100  0.1299  0.7793  2.3271 
## 
## Random effects:
##  Groups            Name        Variance Std.Dev.
##  administration_id (Intercept) 0.005283 0.07269 
##  target_label      (Intercept) 0.005629 0.07503 
##  Residual                      0.082712 0.28760 
## Number of obs: 14792, groups:  administration_id, 1097; target_label, 115
## 
## Fixed effects:
##               Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)  6.173e-01  1.060e-02 8.065e+01   58.22   <2e-16 ***
## age_centered 4.336e-03  3.452e-04 2.943e+03   12.56   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## age_centerd -0.056

add age slopes!

m2 <- lmer(prop_looking ~ age_centered 
           + (1 | administration_id) + (age_centered || target_label),
           data = acc_mod_data)

summary(m2)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## prop_looking ~ age_centered + (1 | administration_id) + (age_centered ||  
##     target_label)
##    Data: acc_mod_data
## 
## REML criterion at convergence: 5835.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.7976 -0.6119  0.1265  0.7795  2.3444 
## 
## Random effects:
##  Groups            Name         Variance  Std.Dev.
##  administration_id (Intercept)  5.275e-03 0.072627
##  target_label      (Intercept)  5.770e-03 0.075957
##  target_label.1    age_centered 1.531e-05 0.003913
##  Residual                       8.231e-02 0.286896
## Number of obs: 14792, groups:  administration_id, 1097; target_label, 115
## 
## Fixed effects:
##               Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)  6.125e-01  1.090e-02 8.326e+01  56.170  < 2e-16 ***
## age_centered 3.296e-03  9.317e-04 3.245e+01   3.537  0.00124 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## age_centerd 0.062

distractors

m3 <- lmer(prop_looking ~ age_centered
           + (1 | administration_id) + (age_centered || target_label) +
             + (1 | distractor_label),
           data = acc_mod_data)

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00466064 (tol = 0.002, component 1)

summary(m3)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## prop_looking ~ age_centered + (1 | administration_id) + (age_centered ||  
##     target_label) + +(1 | distractor_label)
##    Data: acc_mod_data
## 
## REML criterion at convergence: 5779.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.8153 -0.6092  0.1304  0.7718  2.3441 
## 
## Random effects:
##  Groups            Name         Variance  Std.Dev.
##  administration_id (Intercept)  5.190e-03 0.072042
##  target_label      (Intercept)  5.426e-03 0.073660
##  target_label.1    age_centered 6.594e-06 0.002568
##  distractor_label  (Intercept)  3.552e-03 0.059600
##  Residual                       8.178e-02 0.285975
## Number of obs: 14792, groups:  
## administration_id, 1097; target_label, 115; distractor_label, 115
## 
## Fixed effects:
##               Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)  6.114e-01  1.315e-02 8.418e+01  46.482  < 2e-16 ***
## age_centered 4.205e-03  7.742e-04 1.800e+01   5.432 3.68e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## age_centerd 0.012 
## convergence code: 0
## Model failed to converge with max|grad| = 0.00466064 (tol = 0.002, component 1)

age for distractors

m4 <- lmer(prop_looking ~ age_centered
           + (1 | administration_id) + (age_centered || target_label) +
             + (age_centered || distractor_label),
           data = acc_mod_data)

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0157822 (tol = 0.002, component 1)

summary(m4)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## prop_looking ~ age_centered + (1 | administration_id) + (age_centered ||  
##     target_label) + +(age_centered || distractor_label)
##    Data: acc_mod_data
## 
## REML criterion at convergence: 5779.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.8151 -0.6094  0.1303  0.7717  2.3444 
## 
## Random effects:
##  Groups             Name         Variance  Std.Dev. 
##  administration_id  (Intercept)  5.192e-03 0.0720562
##  target_label       (Intercept)  5.448e-03 0.0738096
##  target_label.1     age_centered 6.498e-06 0.0025491
##  distractor_label   (Intercept)  3.541e-03 0.0595093
##  distractor_label.1 age_centered 8.861e-08 0.0002977
##  Residual                        8.178e-02 0.2859714
## Number of obs: 14792, groups:  
## administration_id, 1097; target_label, 115; distractor_label, 115
## 
## Fixed effects:
##               Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)  6.114e-01  1.316e-02 8.398e+01  46.461  < 2e-16 ***
## age_centered 4.196e-03  7.754e-04 1.742e+01   5.412 4.29e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## age_centerd 0.012 
## convergence code: 0
## Model failed to converge with max|grad| = 0.0157822 (tol = 0.002, component 1)

Accuracy distractor analysis

Question: do distractors matter in word recognition? Here’s the idea: test between a no-distractor model and a distractor-informed model.

Let’s assume each target \(T\) and distractor \(D\) has a recognizability score, with lower being more difficult.

No-distractor model: target accuracy should be about target difficulty \(T\).

Distractor model: target accuracy should be a luce choice of \(T / (T + D)\).

Two plausible models of recognizability would be 1) childes frequency data and 2) wordbank age of acquisition data.

Let’s start with the wordbank data because it’s easy.

We’ll use Wordbank AOAs estimated in the wordbank book with a bayesian GLM.

Larger AOAs are harder, that’s the opposite of recognizability. Let’s try using inverse AoA.

aoas <- read_csv(here("data/bglm_aoas_english.csv"))

aoa_data <- left_join(acc_mod_data, 
                      aoas %>%
                        transmute(target_label = definition, 
                                  target_aoa = bglm_aoa, 
                                  target_category = category)) %>%
  left_join(aoas %>%
              transmute(distractor_label = definition, 
                        distractor_aoa = bglm_aoa)) %>%
  filter(!is.na(target_aoa), !is.na(distractor_aoa)) %>%
  ungroup() %>%
  mutate(target_aoa_centered = target_aoa - mean(target_aoa), 
         distractor_aoa_centered = distractor_aoa - mean(distractor_aoa))

aoa_data <- aoa_data %>%
  mutate(inverse_target = 1/target_aoa,
         inverse_distractor = 1/distractor_aoa,
         luce = inverse_target / (inverse_target + inverse_distractor))

m1 <- lmer(prop_looking ~ luce + (1|administration_id) + (1|dataset_name), 
           data = aoa_data) 
  
summary(m1)$coef %>%
  knitr::kable(digits = 3)

	Estimate	Std. Error	df	t value	Pr(>\|t\|)
(Intercept)	0.718	0.035	205.467	20.572	0.000
luce	-0.126	0.063	8991.056	-1.984	0.047

m2 <- lmer(prop_looking ~ target_aoa + (1|administration_id) + (1|dataset_name), 
           data = aoa_data)

summary(m2)$coef %>%
  knitr::kable(digits = 3)

	Estimate	Std. Error	df	t value	Pr(>\|t\|)
(Intercept)	0.665	0.024	42.548	27.973	0.000
target_aoa	-0.001	0.001	6189.088	-0.513	0.608

m3 <- lmer(prop_looking ~ inverse_target + (1|administration_id) + 
             (1|dataset_name), 
           data = aoa_data)

summary(m3)$coef %>%
  knitr::kable(digits = 3)

	Estimate	Std. Error	df	t value	Pr(>\|t\|)
(Intercept)	0.664	0.023	36.232	28.784	0.000
inverse_target	-0.156	0.296	7121.531	-0.525	0.599

Add age.

m1_age <- lmer(prop_looking ~ luce * age + (1 | administration_id) + 
                 (1 | dataset_name), 
           data = aoa_data)

summary(m1_age)$coef %>%
  knitr::kable(digits = 3)

	Estimate	Std. Error	df	t value	Pr(>\|t\|)
(Intercept)	0.310	0.083	2784.277	3.743	0.000
luce	0.402	0.162	9122.254	2.490	0.013
age	0.017	0.003	9311.413	5.255	0.000
luce:age	-0.023	0.007	9184.104	-3.545	0.000

And visualize.

ggplot(aoa_data %>%
         mutate(luce_binned = cut(luce, quantile(luce, c(0,.25, .5, .75, 1)), 
                                  include.lowest = TRUE)), 
       aes(x = age, y = prop_looking, col = luce_binned)) + 
  geom_jitter(alpha = .1, width = .2) + 
  geom_smooth(method = "lm")

## Warning: Removed 2 rows containing non-finite values (stat_smooth).

## Warning: Removed 2 rows containing missing values (geom_point).

Peekbank Exploration

peekbank team

12/10/2020

Preliminaries and data loading

Initial analysis

subjects

General Analyses

Reaction time analysis.

Models

Accuracy

Exploration

Models

Accuracy distractor analysis