Peekbank similarity analyses

Author

Tarun Sepuri

Published

June 12, 2026

library(tidyverse)
source("helpers.R")
library(ggrepel)
library(cowplot)
library(here)
library(colorspace)
library(ggeffects)

Import data

usable_trials_summarized_with_sims <- read.csv(here("data/usable_trials_with_similarities.csv"))
aoa_ratings <- read.csv(here("data/metadata/level-aoaratings_type-kuperman_data.csv"))
saliency_values <- read.csv(here("data/metadata/level-imagepair_added-saliency_data.csv"))
usable_trials_summarized_with_sims <- usable_trials_summarized_with_sims |> 
  left_join(aoa_ratings |> transmute(text1=Word, aoa=AoA_Kup_lem)) |>
  left_join(saliency_values, by=c("unique_pair"="ImagePair")) |>
  mutate(age_bucket_bigger = cut(age, 
                          breaks = c(12, 18, 24, 30, 36, 48, 60), 
                          labels = c("(12-18]", "(18-24]", "(24-30]", "(30-36]", "(36-48]", "(48-60]"),
                          include.lowest = TRUE),
         age_bucket = cut(age,
                                 breaks = c(12, 24, 36, 48, 60),
                                  labels = c("(12-24]", "(24-36]", "(36-48]", "(48-60]"),
                                 include.lowest = TRUE))

# rounding each participant to the closest 5
age_based_trials <- usable_trials_summarized_with_sims |> mutate(
  rounded_age = round_to_nearest(age, round_to=5)
)

clip_data_summarized <- summarize_similarity_data_collapsed(usable_trials_summarized_with_sims, extra_fields = c("dataset_name", "vanilla_trial", "aoa", "MeanSaliencyDiff")) |> mutate(
   sim_logit = qlogis(pmin(pmax(image_similarity, 1e-6), 1 - 1e-6))
)

clip_data_summarized_age_bucket <- summarize_similarity_data_collapsed(usable_trials_summarized_with_sims, extra_fields = c("dataset_name", "vanilla_trial", "aoa", "MeanSaliencyDiff", "age_bucket")) |> mutate(
   sim_logit = qlogis(pmin(pmax(image_similarity, 1e-6), 1 - 1e-6))
)

vanilla <- clip_data_summarized |> filter(vanilla_trial == 1)

sims <- c("image_similarity","text_similarity","multimodal_similarity","ooo_similarity")
sims_scaled <- c("scaled_image_similarity", "scaled_text_similarity", "scaled_multimodal_similarity", "scaled_ooo_similarity")
facet_labs <- c(text_similarity       = "Text similarity",
                image_similarity      = "Image similarity",
                multimodal_similarity = "Multimodal similarity",
                ooo_similarity        = "Adult behavioral similarity")

downloaded_aois <- readRDS(here("intermediates/downloaded_aois.Rds"))
 clip_data_summarized |> mutate(
    sim_bucket = cut(
      image_similarity,
      breaks = seq(0, 1, by = 0.05),
      include.lowest = TRUE,
      right = FALSE
    )
  ) |>
  count(sim_bucket) |>
  ggplot(aes(x = n, y = sim_bucket)) +
  geom_col() +
  labs(
    x = "Count",
    y = "Image similarity bucket",
    title = "Distribution of image similarity"
  ) +
  theme_minimal()

Main similarity plots

CLIP analysis: current similarity effects are dubious with lots of dataset level variance of course that will have to be accounted for in a mixed effects model; Garrison Bergelson dataset has individualized trials so difficult to make use of as well.

# N = the number of participants in a single trial here
adams_marchman_data_summarized <- summarize_similarity_data_collapsed(usable_trials_summarized_with_sims, extra_fields = c("dataset_name", "vanilla_trial")) |> filter(N > 10 & dataset_name == "adams_marchman_2018" & vanilla_trial==1)
am_plots <- generate_multimodal_plots(adams_marchman_data_summarized, "CLIP", title="Adams & Marchman, 2018")
am_plots

weaver_zettersten_data_summarized <- summarize_similarity_data_collapsed(usable_trials_summarized_with_sims, extra_fields = c("dataset_name", "vanilla_trial")) |> filter(N > 10 & dataset_name == "weaver_zettersten_2024" & vanilla_trial==1)
wz_plots <- generate_multimodal_plots(weaver_zettersten_data_summarized, "CLIP", title="Weaver et al., 2024")
wz_plots

clip_plots <- generate_multimodal_plots(clip_data_summarized |> filter(vanilla_trial==1), "CLIP", title="all vanilla trials")
clip_plots

generate_multimodal_plots(vanilla |> filter(N >= 5), "CLIP", title = "all vanilla trials (N >= 5)", size_by_n = TRUE, base_size=16)

more multimodal plot visualizations

generate_multimodal_plots(vanilla, "CLIP", title = "all vanilla trials", size_by_n = TRUE, base_size=16)

generate_multimodal_plots(vanilla, "CLIP", title = "by dataset",   group_var = "dataset_name", se_alpha=0.2, point_alpha=0.1, show_legend = TRUE, line_alpha=0.4, show_cor=FALSE, overall_line=TRUE, overall_alpha=0.7, base_size=16)

generate_multimodal_plots(clip_data_summarized, "CLIP", title = "different trial types", group_var = "vanilla_trial", point_alpha=0.1, show_se=TRUE, base_size=16)

generate_multimodal_plots(clip_data_summarized_age_bucket |> filter(vanilla_trial == 1 & !is.na(age_bucket) & N>=5), "CLIP", title = "different age buckets (N>=5)", group_var = "age_bucket",  show_se = TRUE, se_alpha = 0.2, point_alpha=0.1, line_alpha = 0.8, line_width=1.1, base_size=16)

clip_data_summarized_age_bucket_bigger <- summarize_similarity_data_collapsed(usable_trials_summarized_with_sims, extra_fields = c("dataset_name", "vanilla_trial", "aoa", "MeanSaliencyDiff", "age_bucket_bigger")) |> mutate(
   sim_logit = qlogis(pmin(pmax(image_similarity, 1e-6), 1 - 1e-6)))
   generate_multimodal_plots(clip_data_summarized_age_bucket_bigger |> filter(vanilla_trial == 1 & !is.na(age_bucket_bigger) & N>=5), "CLIP", title = "different larger age buckets (N>=5)", group_var = "age_bucket_bigger",  show_se = TRUE, se_alpha = 0.2, point_alpha=0.1, line_alpha = 0.8, line_width=1.1, base_size=16)

By age

plot_data_age <- clip_data_summarized_age_bucket |>
  pivot_longer(all_of(sims), names_to = "sim_type", values_to = "similarity") |>
  mutate(sim_type = factor(sim_type, levels = sims)) 

ggplot(
  plot_data_age |> filter(!is.na(age_bucket)),
  aes(
    similarity,
    mean_value,
    colour = age_bucket,
    fill = age_bucket
  )
) +
  geom_hline(yintercept = 0.5, linetype = "dashed") +
  geom_point(alpha = 0.1, size = 2) +
  
  stat_smooth(
    method = "lm",
    se = TRUE,
    aes(fill = age_bucket),
    alpha = 0.15   # ribbon alpha
  ) +

  stat_smooth(
    method = "lm",
    se = FALSE,
    linewidth = 1.2
  ) +
  facet_wrap(
    ~ sim_type,
    nrow = 2,
    scales = "free_x",
    labeller = labeller(sim_type = facet_labs),
    strip.position = "top"
  ) +
  scale_color_discrete_qualitative(palette = "Set 3") +
  labs(
    x = "Target–distractor similarity",
    y = "Proportion target looking in critical window",
    colour = "Age bucket"
  ) +
  guides(fill = "none") +
  theme_classic(base_size = 16) +
  theme(
    legend.position = "bottom",
    strip.text = element_text(size = 20),
    panel.spacing = unit(1, "lines"),
    plot.margin = margin(t = 5.5, r = 30, b = 5.5, l = 5.5, unit = "pt")
  )

comparing all IVs

library(GGally)
confusability_vars <- clip_data_summarized |>
  filter(vanilla_trial==1) |>
  transmute(
    mean_value=scale(mean_value)[, 1],
    image_similarity=scale(image_similarity)[, 1],
    text_similarity=scale(text_similarity)[, 1],
    multimodal_similarity=scale(multimodal_similarity)[, 1],
    ooo_similarity=scale(ooo_similarity)[, 1],
    aoa=scale(aoa)[, 1],
    MeanSaliencyDiff=scale(MeanSaliencyDiff)[, 1]
  )

p <- ggpairs(
  confusability_vars,
  upper = list(continuous = wrap("cor", method = "spearman",
                                 use = "pairwise.complete.obs", size = 3)),
  lower = list(continuous = wrap("smooth", method = "lm",
                                 alpha = 0.4, size = 0.8)),
  diag  = list(continuous = wrap("densityDiag", alpha = 0.5))
) +
  theme_bw()

p

so many interesting colinear effects here with AoA and mean saliency..

# Load required libraries
library(ggcorrplot)
library(Hmisc) # Required for computing p-values

# 1. Prepare and rename your data
confusability_vars <- clip_data_summarized |>
  filter(vanilla_trial == 1) |>
  transmute(
  #  mean_value = scale(mean_value)[, 1],
    image_similarity = scale(image_similarity)[, 1],
    text_similarity = scale(text_similarity)[, 1],
    multimodal_similarity = scale(multimodal_similarity)[, 1],
    ooo_similarity = scale(ooo_similarity)[, 1],
    aoa = scale(aoa)[, 1],
    MeanSaliencyDiff = scale(MeanSaliencyDiff)[, 1]
  ) |>
  # RENAME YOUR PREDICTORS HERE:
  # Format: "New Name" = old_name
  rename(
   # "Critical window looking"            = mean_value,
    "Image similarity"      = image_similarity,
    "Text similarity"       = text_similarity,
    "Multimodal similarity" = multimodal_similarity,
    "Adult behavioral similarity"        = ooo_similarity,
    "Estimated target age of acquisition"    = aoa,
    "Saliency difference"    = MeanSaliencyDiff
  )

cor_data <- rcorr(as.matrix(confusability_vars), type = "spearman")

# 3. Flatten matrices into a long dataframe for ggplot
r_df <- as.data.frame(cor_data$r) %>% 
  rownames_to_column(var = "Var1") %>% 
  pivot_longer(-Var1, names_to = "Var2", values_to = "r")

p_df <- as.data.frame(cor_data$P) %>% 
  rownames_to_column(var = "Var1") %>% 
  pivot_longer(-Var1, names_to = "Var2", values_to = "p")

# Combine r and p data, and create the significance star labels
plot_data <- left_join(r_df, p_df, by = c("Var1", "Var2")) %>%
  # Fill diagonal p-values (which are NA) with 1 so they don't trigger stars
  mutate(p = ifelse(Var1 == Var2, 1, p)) %>% 
  mutate(
    stars = case_when(
      p < 0.001 ~ "***",
      p < 0.01  ~ "**",
      p < 0.05  ~ "*",
      TRUE      ~ ""
    ),
    # Combine the correlation number and stars into one text label
    label_text = paste0(round(r, 2), stars)
  )

# 4. Filter to get only the LOWER half of the square matrix
# We convert variables to factors to respect the order of our dataframe
vars_order <- colnames(confusability_vars)
plot_data <- plot_data %>%
  mutate(
    Var1 = factor(Var1, levels = vars_order),
    Var2 = factor(Var2, levels = vars_order)
  ) %>%
  filter(as.integer(Var1) > as.integer(Var2)) # Removes diagonal and upper triangle

# 5. Build the Heatmap using ggplot2
ggplot(plot_data, aes(x = Var2, y = Var1, fill = r)) +
  geom_tile(color = "white") +
  # Add the correlation values and significance stars combined
  geom_text(aes(label = label_text), size = 3.5, color = "black") +
  # Define the diverging color palette (Negative, Neutral, Positive)
  scale_fill_gradient2(
    low = "#6D9EC1", mid = "white", high = "#E46726", 
    midpoint = 0, limit = c(-1, 1), space = "Lab", 
    name = "Spearman\ncorrelation"
  ) +
  theme_bw() +
  labs(title = "Correlation of looking behavior predictors", x = NULL, y = NULL) +
  theme(
    axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
    panel.grid.major = element_blank(),
    panel.border = element_blank(),
    axis.ticks = element_blank()
  )

ggsave(here("figures/correlation.png"))

Analysis across age

library(dplyr)
calculate_correlations <- function(data, x_var, y_var, group_var = c("rounded_age"), conf_level = 0.95) {
  data |>
    dplyr::group_by(across(all_of(group_var))) |>
    dplyr::summarize(
      {
        cor_test <- cor.test(.data[[x_var]], .data[[y_var]], method = "pearson", conf.level = conf_level)
        tibble(
          pearson_cor = cor_test$estimate,
          p_value = cor_test$p.value,
          ci_lower = cor_test$conf.int[1],
          ci_upper = cor_test$conf.int[2]
        )
      },
      .groups = "drop"
    )
}

sim_age_plot <- function(data) {
  ggplot(data, aes(x = rounded_age, y = pearson_cor, color = similarity_type)) +
    geom_hline(yintercept = 0, linetype = "dashed") +
    geom_point(size = 3, position = position_dodge(width=0.5)) +  # Apply jitter to points only
    geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), 
                  width = 0.3, alpha = 0.2,
                  position=position_dodge(width=0.5)) +  # No jitter on error bars
    geom_smooth(span = 2, alpha=0.1, se=FALSE) +
    labs(title = paste("Similarity correlations across age"),
         x = "Age",
         y = "Coefficient of similarity") +  
    theme_minimal() +
    guides(shape = "none") +
    scale_x_continuous(breaks=seq(5,70,5)) +
    scale_color_brewer(palette = "Set1", name = "Similarity type") 
}

# can't figure out what values to filter to here?
clip_data_age_summarized <- summarize_similarity_data_collapsed(age_based_trials |> filter(vanilla_trial == 1), extra_fields = c("rounded_age", "dataset_name")) |> filter(N >= 5) |> group_by(rounded_age) |>
  filter(n() >= 5) |>
  ungroup()
clip_age_image_cors <- calculate_correlations(clip_data_age_summarized, "image_similarity", "mean_value") |> mutate(similarity_type = "image")
clip_age_text_cors <- calculate_correlations(clip_data_age_summarized, "text_similarity", "mean_value") |> mutate(similarity_type = "text")
clip_age_multimodal_cors <- calculate_correlations(clip_data_age_summarized, "multimodal_similarity", "mean_value") |> mutate(similarity_type = "multimodal")
clip_age_ooo_cors <- calculate_correlations(clip_data_age_summarized, "ooo_similarity", "mean_value") |> mutate(similarity_type = "ooo")
clip_age_cors <- bind_rows(clip_age_image_cors, clip_age_text_cors, clip_age_multimodal_cors, clip_age_ooo_cors)
sim_age_plot(clip_age_cors)

ggplot(clip_age_cors, aes(x = rounded_age, y = pearson_cor)) +
  geom_point(aes(color = p_value < 0.05), size = 3) + 
  geom_smooth(span = 2) +
  labs(title = "Image similarity correlation across age",
       x = "Age in months",
       y = "Pearson Correlation") +
  scale_color_manual(values = c("TRUE" = "black", "FALSE" = "gray")) +  # Set color for significance
  theme_minimal() +
  theme(legend.position = "none")

time

library(tidyr)
crit <- qnorm(0.975)

# one row per trial type, median-split within each similarity dimension
sims_long <- usable_trials_summarized_with_sims |> filter(vanilla_trial == 1) |>
  select(trial_type_id, ends_with("_similarity")) |>
  distinct() |>
  pivot_longer(ends_with("_similarity"),
               names_to = "sim_type", values_to = "similarity") |>
  filter(!is.na(similarity)) |>
  group_by(sim_type) |>
  mutate(sim_split = if_else(similarity >= median(similarity, na.rm = TRUE),
                             "high", "low")) |>
  ungroup() |>
  mutate(sim_type = sub("_similarity$", "", sim_type))

aoi_base <- downloaded_aois |>
  filter(!is.na(accuracy), trial_type_id %in% sims_long$trial_type_id)

# fan out: each timepoint contributes to its high/low bucket in each dimension
aoi_split <- aoi_base |>
  left_join(sims_long, by = "trial_type_id", relationship = "many-to-many")

# subject means per condition x time, then across-subject mean + 95% CI
across_split <- aoi_split |>
  group_by(sim_type, sim_split, administration_id, t_norm) |>
  summarise(mean_accuracy = mean(accuracy, na.rm = TRUE), .groups = "drop") |>
  group_by(sim_type, sim_split, t_norm) |>
  summarise(accuracy = mean(mean_accuracy, na.rm = TRUE),
            ci = crit * sd(mean_accuracy, na.rm = TRUE) / sqrt(n()),
            .groups = "drop") |>
  mutate(condition = paste(sim_type, sim_split))

summarize_across_subj_aois <- readRDS(here("intermediates/summarize_across_subj_aois.Rds"))
# the existing overall line as the 9th condition
plot_df <- bind_rows(
  across_split |> select(t_norm, accuracy, ci, condition),
  summarize_across_subj_aois |> mutate(condition = "Overall") |>
    select(t_norm, accuracy, ci, condition)
) |>
  mutate(condition = forcats::fct_relevel(condition, "Overall"))

looking_times <- ggplot(plot_df, aes(t_norm, accuracy,
                                      colour = condition, fill = condition)) +
  xlim(-2000, 4000) +
  geom_errorbar(aes(ymin = accuracy - ci, ymax = accuracy + ci),
                width = 0, alpha = 0.15) +
  geom_smooth(method = "gam", se = FALSE, linewidth = 1) +
  geom_vline(xintercept = 0, size = 1.5) +
  geom_hline(yintercept = 0.5, size = 1.2, linetype = "dashed") +
  geom_vline(xintercept = 300, linetype = "dotted") +
  ylim(0, 1) +
  xlab("Time (normalized to target word onset) in ms") +
  ylab("Proportion Target Looking") +
  labs(colour = "Condition", fill = "Condition")
looking_times

stats

pre-registered models

library(lmerTest)
library(glmmTMB)
library(MuMIn)
library(broom.mixed)
model_data <- usable_trials_summarized_with_sims |> filter(vanilla_trial == 1)
model_data_scaled <- model_data |> 
  mutate(
    # Scale your response variable but keep the original for plotting later
    scaled_response = as.vector(scale(mean_target_looking_critical_window)),
    scaled_age = as.vector(scale(age)),
    scaled_aoa = as.vector(scale(aoa)),
    scaled_saliency_diff = as.vector(scale(MeanSaliencyDiff))
  ) |> 
  # Dynamic scaling for your various similarity columns
  mutate(across(all_of(sims), ~as.vector(scale(.x)), .names = "scaled_{.col}"))

fit_main <- function(sim,
                     data = model_data,
                     response = "mean_target_looking_critical_window",
                     added_structure = NULL, pruned_model=FALSE) {
  terms <- c(
    sprintf("scale(%s)*scale(age)", sim),
    "scale(aoa)",
    "scale(MeanSaliencyDiff)",
    "(1 | original_target_label:img_key)",
    "(1 | dataset_id)"
  )
  if (pruned_model) {
    terms <- c(terms, "(1 | subject_id)")
  } else {
    terms <- c(terms, sprintf("(1 + scale(%s) | subject_id)", sim))
  }
  if (!is.null(added_structure)) {
    terms <- c(terms, added_structure)
  }
  f <- reformulate(
    terms,
    response = sprintf("scale(%s)", response)
  )
  lmer(f, data = data)
}

fit_main_scaled <- function(sim, data = model_data_scaled, pruned_model = FALSE) {
  
  # Use the pre-scaled variable names
  terms <- c(
    sprintf("scaled_%s * scaled_age", sim),
    "scaled_aoa",
    "scaled_saliency_diff",
    "(1 | original_target_label:img_key)",
    "(1 | dataset_id)"
  )
  
  if (pruned_model) {
    terms <- c(terms, "(1 | subject_id)")
  } else {
    terms <- c(terms, sprintf("(1 + scaled_%s | subject_id)", sim))
  }
  
  # Response is already pre-scaled
  f <- reformulate(terms, response = "scaled_response")
  
  lmer(f, data = data)
}

mods <- lapply(sims, fit_main); names(mods) <- sims
mods_scaled <- lapply(sims, fit_main_scaled); names(mods_scaled) <- sims
lapply(mods, function(m) m@optinfo$conv$lme4$messages)
$image_similarity
NULL

$text_similarity
[1] "boundary (singular) fit: see help('isSingular')"

$multimodal_similarity
NULL

$ooo_similarity
NULL
lapply(mods, function(m) summary(m))
$image_similarity
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: f
   Data: data

REML criterion at convergence: 58356.9

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.3201 -0.6271  0.1156  0.7219  2.4420 

Random effects:
 Groups                        Name                    Variance Std.Dev. Corr 
 subject_id                    (Intercept)             0.044743 0.21152       
                               scale(image_similarity) 0.002014 0.04488  -0.18
 original_target_label:img_key (Intercept)             0.051918 0.22786       
 dataset_id                    (Intercept)             0.022916 0.15138       
 Residual                                              0.850784 0.92238       
Number of obs: 21312, groups:  
subject_id, 1356; original_target_label:img_key, 865; dataset_id, 24

Fixed effects:
                                     Estimate Std. Error         df t value
(Intercept)                        -5.716e-03  3.588e-02  2.214e+01  -0.159
scale(image_similarity)            -3.231e-02  1.741e-02  4.442e+02  -1.856
scale(age)                          2.106e-01  1.513e-02  8.437e+02  13.920
scale(aoa)                          1.767e-03  1.189e-02  7.046e+02   0.149
scale(MeanSaliencyDiff)             6.220e-03  1.202e-02  4.642e+02   0.517
scale(image_similarity):scale(age) -6.794e-03  1.096e-02  2.275e+03  -0.620
                                   Pr(>|t|)    
(Intercept)                          0.8749    
scale(image_similarity)              0.0641 .  
scale(age)                           <2e-16 ***
scale(aoa)                           0.8819    
scale(MeanSaliencyDiff)              0.6052    
scale(image_similarity):scale(age)   0.5354    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) scl(g) scal() s(MSD)
scl(mg_sml) -0.058                            
scale(age)  -0.099 -0.044                     
scale(aoa)  -0.077 -0.079 -0.045              
scl(MnSlnD)  0.000  0.083  0.023  0.069       
scl(mg_):() -0.033  0.064 -0.292  0.027 -0.051

$text_similarity
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: f
   Data: data

REML criterion at convergence: 58359.1

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.3210 -0.6265  0.1140  0.7246  2.4044 

Random effects:
 Groups                        Name                   Variance  Std.Dev. Corr 
 subject_id                    (Intercept)            0.0444983 0.2109        
                               scale(text_similarity) 0.0004709 0.0217   -1.00
 original_target_label:img_key (Intercept)            0.0526953 0.2296        
 dataset_id                    (Intercept)            0.0233636 0.1529        
 Residual                                             0.8511638 0.9226        
Number of obs: 21312, groups:  
subject_id, 1356; original_target_label:img_key, 865; dataset_id, 24

Fixed effects:
                                    Estimate Std. Error         df t value
(Intercept)                       -8.421e-03  3.666e-02  2.253e+01  -0.230
scale(text_similarity)             9.436e-04  1.726e-02  5.228e+02   0.055
scale(age)                         2.095e-01  1.506e-02  8.929e+02  13.915
scale(aoa)                         1.114e-03  1.217e-02  6.855e+02   0.092
scale(MeanSaliencyDiff)            7.590e-03  1.202e-02  4.681e+02   0.632
scale(text_similarity):scale(age)  5.362e-03  1.092e-02  3.014e+03   0.491
                                  Pr(>|t|)    
(Intercept)                          0.820    
scale(text_similarity)               0.956    
scale(age)                          <2e-16 ***
scale(aoa)                           0.927    
scale(MeanSaliencyDiff)              0.528    
scale(text_similarity):scale(age)    0.624    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) scl(g) scal() s(MSD)
scl(txt_sm)  0.148                            
scale(age)  -0.088  0.015                     
scale(aoa)  -0.042  0.188 -0.017              
scl(MnSlnD)  0.010  0.032  0.018  0.083       
scl(tx_):()  0.058 -0.110  0.251  0.051  0.023
optimizer (nloptwrap) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')


$multimodal_similarity
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: f
   Data: data

REML criterion at convergence: 58357.3

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.3228 -0.6271  0.1141  0.7202  2.4608 

Random effects:
 Groups                        Name                         Variance Std.Dev.
 subject_id                    (Intercept)                  0.044987 0.21210 
                               scale(multimodal_similarity) 0.003344 0.05782 
 original_target_label:img_key (Intercept)                  0.051844 0.22769 
 dataset_id                    (Intercept)                  0.022924 0.15141 
 Residual                                                   0.848875 0.92134 
 Corr 
      
 -0.11
      
      
      
Number of obs: 21312, groups:  
subject_id, 1356; original_target_label:img_key, 865; dataset_id, 24

Fixed effects:
                                          Estimate Std. Error         df
(Intercept)                             -1.128e-02  3.582e-02  2.231e+01
scale(multimodal_similarity)            -1.815e-02  1.346e-02  4.284e+02
scale(age)                               2.071e-01  1.458e-02  7.226e+02
scale(aoa)                              -2.018e-03  1.202e-02  7.084e+02
scale(MeanSaliencyDiff)                  7.140e-03  1.196e-02  4.698e+02
scale(multimodal_similarity):scale(age)  1.628e-03  9.464e-03  2.227e+03
                                        t value Pr(>|t|)    
(Intercept)                              -0.315    0.756    
scale(multimodal_similarity)             -1.348    0.178    
scale(age)                               14.207   <2e-16 ***
scale(aoa)                               -0.168    0.867    
scale(MeanSaliencyDiff)                   0.597    0.551    
scale(multimodal_similarity):scale(age)   0.172    0.863    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) scl(g) scal() s(MSD)
scl(mltmd_)  0.022                            
scale(age)  -0.113  0.005                     
scale(aoa)  -0.075  0.150 -0.037              
scl(MnSlnD)  0.003  0.024  0.008  0.080       
scl(ml_):()  0.015 -0.036  0.112  0.016 -0.027

$ooo_similarity
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: f
   Data: data

REML criterion at convergence: 55456.5

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.3359 -0.6230  0.1168  0.7207  2.4523 

Random effects:
 Groups                        Name                  Variance Std.Dev. Corr 
 subject_id                    (Intercept)           0.046212 0.21497       
                               scale(ooo_similarity) 0.005711 0.07557  -0.20
 original_target_label:img_key (Intercept)           0.048499 0.22022       
 dataset_id                    (Intercept)           0.020838 0.14435       
 Residual                                            0.840619 0.91685       
Number of obs: 20316, groups:  
subject_id, 1356; original_target_label:img_key, 795; dataset_id, 24

Fixed effects:
                                   Estimate Std. Error         df t value
(Intercept)                      -1.404e-02  3.498e-02  2.194e+01  -0.401
scale(ooo_similarity)            -2.645e-02  1.704e-02  3.537e+02  -1.552
scale(age)                        2.139e-01  1.492e-02  5.606e+02  14.334
scale(aoa)                       -3.091e-03  1.229e-02  6.394e+02  -0.252
scale(MeanSaliencyDiff)           7.709e-03  1.223e-02  4.242e+02   0.630
scale(ooo_similarity):scale(age)  1.665e-02  9.836e-03  1.769e+03   1.693
                                 Pr(>|t|)    
(Intercept)                        0.6921    
scale(ooo_similarity)              0.1215    
scale(age)                         <2e-16 ***
scale(aoa)                         0.8015    
scale(MeanSaliencyDiff)            0.5289    
scale(ooo_similarity):scale(age)   0.0907 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) scl(g) scal() s(MSD)
scl(_smlrt)  0.103                            
scale(age)  -0.125 -0.028                     
scale(aoa)  -0.101 -0.079 -0.040              
scl(MnSlnD)  0.015  0.064  0.006  0.070       
scl(_sm):() -0.038 -0.127  0.196  0.026 -0.030
r.squaredGLMM(mods$image_similarity)
            R2m       R2c
[1,] 0.04098928 0.1610329

Trying to see why text and multimodal similarity models were singular.

vars <- c("image_similarity", "text_similarity", "multimodal_similarity",
          "ooo_similarity", "mean_target_looking_critical_window",
          "age", "aoa", "MeanSaliencyDiff")

colSums(is.na(model_data[vars]))
                   image_similarity                     text_similarity 
                                  0                                   0 
              multimodal_similarity                      ooo_similarity 
                                  0                                1118 
mean_target_looking_critical_window                                 age 
                                  0                                 131 
                                aoa                    MeanSaliencyDiff 
                               1260                                   0 
# total rows, and rows complete across all vars 
nrow(model_data)
[1] 22703
sum(complete.cases(model_data[vars]))
[1] 20316
# rows complete without requiring ooo
sum(complete.cases(model_data[setdiff(vars, "ooo_similarity")]))
[1] 21312
library(ggeffects)
mu  <- mean(model_data$mean_target_looking_critical_window, na.rm = TRUE)
sig <- sd(model_data$mean_target_looking_critical_window, na.rm = TRUE)

all_predictions <- lapply(sims, function(sim) {
  term_name <- sim
  
  # Generate prediction
  pred <- ggpredict(mods[[sim]], terms = term_name)
  
  # Label it so we know which facet it belongs to
  pred$sim_type <- paste(gsub("_", " ", sim))
  return(pred)
})

combined_main_preds <- do.call(rbind, all_predictions)

combined_main_preds$predicted_raw <- combined_main_preds$predicted * sig + mu
combined_main_preds$conf.low_raw  <- combined_main_preds$conf.low  * sig + mu
combined_main_preds$conf.high_raw <- combined_main_preds$conf.high * sig + mu

ggplot(
  combined_main_preds, 
  aes(x = x, y = predicted_raw)
) +
  # Confidence intervals ribbon
  geom_ribbon(
    aes(ymin = conf.low_raw, ymax = conf.high_raw), 
    alpha = 0.15, 
    fill = "steelblue"
  ) +
  # Main effect prediction line
  geom_line(
    linewidth = 1.2, 
    color = "steelblue"
  ) +
  # 50% chance line (useful for target looking paradigms)
  geom_hline(
    yintercept = 0.5, 
    linetype = "dashed", 
    alpha = 0.5
  ) +
  # Separate into 4 panels side-by-side / wrapped
  facet_wrap(~ sim_type, scales = "free_x") +
  labs(
    title = "Main similarity predictors",
    subtitle = "Critical Window Predictions",
    x = "Target-distractor similarity",
    y = "Predicted Proportion Target Looking"
  ) +
  theme_classic() +
  theme(
    strip.background = element_blank(),
    strip.text = element_text(face = "bold", size = 11)
  )

getting rid of singular effects

getting rid of singular effects

pruned_text_model <- lmer(scale(mean_target_looking_critical_window) ~ scale(text_similarity)*scale(age)
                    + scale(aoa) + scale(MeanSaliencyDiff)
                    + (1 | subject_id)
                    + (1 | original_target_label:img_key)
                    + (1 | dataset_id), data = model_data)

summary(pruned_text_model)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: scale(mean_target_looking_critical_window) ~ scale(text_similarity) *  
    scale(age) + scale(aoa) + scale(MeanSaliencyDiff) + (1 |  
    subject_id) + (1 | original_target_label:img_key) + (1 |      dataset_id)
   Data: model_data

REML criterion at convergence: 58365

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.3184 -0.6273  0.1148  0.7229  2.4557 

Random effects:
 Groups                        Name        Variance Std.Dev.
 subject_id                    (Intercept) 0.04517  0.2125  
 original_target_label:img_key (Intercept) 0.05270  0.2296  
 dataset_id                    (Intercept) 0.02383  0.1544  
 Residual                                  0.85203  0.9231  
Number of obs: 21312, groups:  
subject_id, 1356; original_target_label:img_key, 865; dataset_id, 24

Fixed effects:
                                    Estimate Std. Error         df t value
(Intercept)                       -7.990e-03  3.693e-02  2.283e+01  -0.216
scale(text_similarity)             1.656e-03  1.715e-02  5.306e+02   0.097
scale(age)                         2.089e-01  1.509e-02  9.302e+02  13.842
scale(aoa)                         6.770e-04  1.214e-02  6.870e+02   0.056
scale(MeanSaliencyDiff)            7.405e-03  1.201e-02  4.684e+02   0.616
scale(text_similarity):scale(age)  4.792e-03  1.088e-02  3.133e+03   0.441
                                  Pr(>|t|)    
(Intercept)                          0.831    
scale(text_similarity)               0.923    
scale(age)                          <2e-16 ***
scale(aoa)                           0.956    
scale(MeanSaliencyDiff)              0.538    
scale(text_similarity):scale(age)    0.660    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) scl(g) scal() s(MSD)
scl(txt_sm)  0.158                            
scale(age)  -0.086  0.011                     
scale(aoa)  -0.042  0.189 -0.016              
scl(MnSlnD)  0.010  0.031  0.018  0.084       
scl(tx_):()  0.053 -0.105  0.278  0.051  0.024

model comparison

Effects persist in the pruned models. Now trying to compare to see which one has the best fit.

dat_cc <- model_data |>
  tidyr::drop_na(image_similarity, text_similarity, multimodal_similarity, ooo_similarity,
                 mean_target_looking_critical_window,
                 age, aoa, MeanSaliencyDiff)

fit_common <- function(sim, data=dat_cc) {
  f <- reformulate(
    c(sprintf("scale(%s)*scale(age)", sim),
      "scale(aoa)", "scale(MeanSaliencyDiff)",
      "(1 | subject_id)",
      "(1 | original_target_label:img_key)",
      "(1 | dataset_id)"),
    response = "scale(mean_target_looking_critical_window)")
  lmer(f, data = dat_cc, REML = FALSE)
}

mods_common <- lapply(sims, fit_common); names(mods_common) <- sims

sel <- model.sel(mods_common$image_similarity, mods_common$text_similarity,
                 mods_common$multimodal_similarity, mods_common$ooo_similarity)
sel
Model selection table 
                                     (Int) scl(age)  scl(aoa) scl(img_sml)
mods_common$ooo_similarity        -0.02318   0.2185 -0.003486             
mods_common$image_similarity      -0.01094   0.2172 -0.003830     -0.03736
mods_common$multimodal_similarity -0.01786   0.2139 -0.007671             
mods_common$text_similarity       -0.01382   0.2153 -0.004190             
                                  scl(MSD) scl(age):scl(img_sml) scl(txt_sml)
mods_common$ooo_similarity        0.007068                                   
mods_common$image_similarity      0.007010             -0.006103             
mods_common$multimodal_similarity 0.008287                                   
mods_common$text_similarity       0.008957                           0.005492
                                  scl(age):scl(txt_sml) scl(mlt_sml)
mods_common$ooo_similarity                                          
mods_common$image_similarity                                        
mods_common$multimodal_similarity                           -0.01998
mods_common$text_similarity                    0.004236             
                                  scl(age):scl(mlt_sml) scl(ooo_sml)
mods_common$ooo_similarity                                  -0.02563
mods_common$image_similarity                                        
mods_common$multimodal_similarity              0.003695             
mods_common$text_similarity                                         
                                  scl(age):scl(ooo_sml) df    logLik    AICc
mods_common$ooo_similarity                      0.01669 10 -27807.11 55634.2
mods_common$image_similarity                            10 -27807.25 55634.5
mods_common$multimodal_similarity                       10 -27808.33 55636.7
mods_common$text_similarity                             10 -27809.32 55638.7
                                  delta weight
mods_common$ooo_similarity         0.00  0.441
mods_common$image_similarity       0.29  0.382
mods_common$multimodal_similarity  2.45  0.130
mods_common$text_similarity        4.43  0.048
Models ranked by AICc(x) 
Random terms (all models): 
  1 | subject_id, 1 | original_target_label:img_key, 1 | dataset_id
summary(mods_common$image_similarity)
Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
  method [lmerModLmerTest]
Formula: f
   Data: dat_cc

      AIC       BIC    logLik -2*log(L)  df.resid 
  55634.5   55713.7  -27807.3   55614.5     20306 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.3407 -0.6238  0.1154  0.7224  2.4765 

Random effects:
 Groups                        Name        Variance Std.Dev.
 subject_id                    (Intercept) 0.04875  0.2208  
 original_target_label:img_key (Intercept) 0.04813  0.2194  
 dataset_id                    (Intercept) 0.01917  0.1385  
 Residual                                  0.85235  0.9232  
Number of obs: 20316, groups:  
subject_id, 1356; original_target_label:img_key, 795; dataset_id, 24

Fixed effects:
                                     Estimate Std. Error         df t value
(Intercept)                        -1.094e-02  3.380e-02  2.310e+01  -0.324
scale(image_similarity)            -3.736e-02  1.795e-02  4.085e+02  -2.082
scale(age)                          2.172e-01  1.547e-02  7.556e+02  14.035
scale(aoa)                         -3.830e-03  1.217e-02  6.345e+02  -0.315
scale(MeanSaliencyDiff)             7.010e-03  1.234e-02  4.252e+02   0.568
scale(image_similarity):scale(age) -6.103e-03  1.164e-02  2.163e+03  -0.525
                                   Pr(>|t|)    
(Intercept)                           0.749    
scale(image_similarity)               0.038 *  
scale(age)                           <2e-16 ***
scale(aoa)                            0.753    
scale(MeanSaliencyDiff)               0.570    
scale(image_similarity):scale(age)    0.600    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) scl(g) scal() s(MSD)
scl(mg_sml) -0.080                            
scale(age)  -0.091 -0.060                     
scale(aoa)  -0.094 -0.064 -0.054              
scl(MnSlnD)  0.006  0.082  0.026  0.067       
scl(mg_):() -0.047  0.088 -0.312  0.033 -0.051

Image similarity model comes out on top.

including non-vanilla trials

model_data_all <- usable_trials_summarized_with_sims |>
  mutate(vanilla_trial = ifelse(vanilla_trial==1, 0.5, -0.5))
mods_all <- lapply(sims, fit_main, model_data_all, added_structure="scale(vanilla_trial)"); names(mods_all) <- sims
lapply(mods_all, function(m) m@optinfo$conv$lme4$messages)
$image_similarity
NULL

$text_similarity
NULL

$multimodal_similarity
NULL

$ooo_similarity
NULL
lapply(mods_all, function(m) summary(m))
$image_similarity
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: f
   Data: data

REML criterion at convergence: 73188.1

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.4171 -0.6169  0.1141  0.7050  2.5167 

Random effects:
 Groups                        Name                    Variance Std.Dev. Corr 
 subject_id                    (Intercept)             0.043822 0.20934       
                               scale(image_similarity) 0.002493 0.04993  -0.15
 original_target_label:img_key (Intercept)             0.069699 0.26401       
 dataset_id                    (Intercept)             0.030970 0.17598       
 Residual                                              0.811356 0.90075       
Number of obs: 27109, groups:  
subject_id, 1579; original_target_label:img_key, 1097; dataset_id, 26

Fixed effects:
                                     Estimate Std. Error         df t value
(Intercept)                         5.764e-02  3.810e-02  2.380e+01   1.513
scale(image_similarity)            -4.204e-02  1.713e-02  7.114e+02  -2.454
scale(age)                          2.203e-01  1.539e-02  1.332e+03  14.311
scale(aoa)                          1.295e-03  1.139e-02  8.538e+02   0.114
scale(MeanSaliencyDiff)             2.110e-02  1.123e-02  7.200e+02   1.879
scale(vanilla_trial)                3.172e-03  1.433e-02  1.427e+03   0.221
scale(image_similarity):scale(age) -2.688e-02  1.055e-02  2.580e+03  -2.549
                                   Pr(>|t|)    
(Intercept)                          0.1435    
scale(image_similarity)              0.0144 *  
scale(age)                           <2e-16 ***
scale(aoa)                           0.9095    
scale(MeanSaliencyDiff)              0.0606 .  
scale(vanilla_trial)                 0.8248    
scale(image_similarity):scale(age)   0.0109 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(m_) scl(g) scal() s(MSD) scl(v_)
scl(mg_sml)  0.016                                     
scale(age)   0.024 -0.091                              
scale(aoa)  -0.054 -0.044  -0.025                      
scl(MnSlnD)  0.000  0.063   0.035  0.000               
scl(vnll_t)  0.030  0.112   0.022 -0.050  0.005        
scl(mg_):() -0.047  0.113  -0.328  0.034 -0.056  0.032 

$text_similarity
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: f
   Data: data

REML criterion at convergence: 71722.2

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.3978 -0.6188  0.1182  0.7065  2.4744 

Random effects:
 Groups                        Name                   Variance  Std.Dev. Corr 
 subject_id                    (Intercept)            0.0443166 0.21051       
                               scale(text_similarity) 0.0004793 0.02189  -0.85
 original_target_label:img_key (Intercept)            0.0627652 0.25053       
 dataset_id                    (Intercept)            0.0321686 0.17936       
 Residual                                             0.8177373 0.90429       
Number of obs: 26522, groups:  
subject_id, 1579; original_target_label:img_key, 1090; dataset_id, 26

Fixed effects:
                                    Estimate Std. Error         df t value
(Intercept)                        6.189e-02  3.868e-02  2.350e+01   1.600
scale(text_similarity)             2.860e-03  1.715e-02  6.727e+02   0.167
scale(age)                         2.069e-01  1.510e-02  1.347e+03  13.707
scale(aoa)                         7.681e-04  1.135e-02  7.922e+02   0.068
scale(MeanSaliencyDiff)            2.164e-02  1.085e-02  6.743e+02   1.993
scale(vanilla_trial)              -7.525e-03  1.445e-02  1.373e+03  -0.521
scale(text_similarity):scale(age) -6.710e-03  1.200e-02  3.204e+03  -0.559
                                  Pr(>|t|)    
(Intercept)                         0.1230    
scale(text_similarity)              0.8676    
scale(age)                          <2e-16 ***
scale(aoa)                          0.9461    
scale(MeanSaliencyDiff)             0.0466 *  
scale(vanilla_trial)                0.6026    
scale(text_similarity):scale(age)   0.5761    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(t_) scl(g) scal() s(MSD) scl(v_)
scl(txt_sm)  0.077                                     
scale(age)   0.013 -0.013                              
scale(aoa)  -0.030  0.197  -0.016                      
scl(MnSlnD) -0.001  0.030   0.023  0.011               
scl(vnll_t)  0.037  0.128   0.047 -0.010  0.005        
scl(tx_):()  0.023 -0.123   0.070  0.018  0.013  0.115 

$multimodal_similarity
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: f
   Data: data

REML criterion at convergence: 73187

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.4146 -0.6167  0.1124  0.7055  2.5370 

Random effects:
 Groups                        Name                         Variance Std.Dev.
 subject_id                    (Intercept)                  0.043531 0.20864 
                               scale(multimodal_similarity) 0.003853 0.06207 
 original_target_label:img_key (Intercept)                  0.068580 0.26188 
 dataset_id                    (Intercept)                  0.032965 0.18156 
 Residual                                                   0.810517 0.90029 
 Corr 
      
 -0.11
      
      
      
Number of obs: 27109, groups:  
subject_id, 1579; original_target_label:img_key, 1097; dataset_id, 26

Fixed effects:
                                          Estimate Std. Error         df
(Intercept)                              4.357e-02  3.928e-02  2.492e+01
scale(multimodal_similarity)            -4.470e-02  1.576e-02  7.191e+02
scale(age)                               2.026e-01  1.458e-02  1.416e+03
scale(aoa)                              -3.613e-03  1.142e-02  8.592e+02
scale(MeanSaliencyDiff)                  2.074e-02  1.112e-02  7.210e+02
scale(vanilla_trial)                    -7.697e-03  1.493e-02  1.639e+03
scale(multimodal_similarity):scale(age) -2.414e-02  1.076e-02  3.289e+03
                                        t value Pr(>|t|)    
(Intercept)                               1.109  0.27799    
scale(multimodal_similarity)             -2.836  0.00469 ** 
scale(age)                               13.897  < 2e-16 ***
scale(aoa)                               -0.316  0.75183    
scale(MeanSaliencyDiff)                   1.865  0.06266 .  
scale(vanilla_trial)                     -0.515  0.60632    
scale(multimodal_similarity):scale(age)  -2.244  0.02488 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(m_) scl(g) scal() s(MSD) scl(v_)
scl(mltmd_)  0.111                                     
scale(age)   0.006 -0.027                              
scale(aoa)  -0.038  0.102  -0.014                      
scl(MnSlnD) -0.002  0.020   0.020  0.006               
scl(vnll_t)  0.056  0.273   0.042 -0.007  0.003        
scl(ml_):() -0.013  0.023   0.086  0.060 -0.014  0.140 

$ooo_similarity
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: f
   Data: data

REML criterion at convergence: 62585.3

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.3561 -0.6192  0.1124  0.7158  2.4673 

Random effects:
 Groups                        Name                  Variance Std.Dev. Corr 
 subject_id                    (Intercept)           0.045157 0.21250       
                               scale(ooo_similarity) 0.005855 0.07652  -0.14
 original_target_label:img_key (Intercept)           0.057803 0.24042       
 dataset_id                    (Intercept)           0.021789 0.14761       
 Residual                                            0.833943 0.91320       
Number of obs: 22966, groups:  
subject_id, 1544; original_target_label:img_key, 881; dataset_id, 26

Fixed effects:
                                   Estimate Std. Error         df t value
(Intercept)                       4.483e-02  3.422e-02  2.191e+01   1.310
scale(ooo_similarity)            -2.366e-02  1.704e-02  3.720e+02  -1.388
scale(age)                        2.201e-01  1.575e-02  7.027e+02  13.976
scale(aoa)                        1.931e-03  1.440e-02  6.728e+02   0.134
scale(MeanSaliencyDiff)           1.109e-02  1.243e-02  4.581e+02   0.892
scale(vanilla_trial)              1.976e-03  1.791e-02  1.306e+03   0.110
scale(ooo_similarity):scale(age)  2.539e-03  1.060e-02  2.032e+03   0.240
                                 Pr(>|t|)    
(Intercept)                         0.204    
scale(ooo_similarity)               0.166    
scale(age)                         <2e-16 ***
scale(aoa)                          0.893    
scale(MeanSaliencyDiff)             0.373    
scale(vanilla_trial)                0.912    
scale(ooo_similarity):scale(age)    0.811    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_s) scl(g) scal() s(MSD) scl(v_)
scl(_smlrt)  0.109                                     
scale(age)   0.010 -0.042                              
scale(aoa)  -0.070 -0.069  -0.033                      
scl(MnSlnD)  0.019  0.048   0.014  0.058               
scl(vnll_t) -0.106  0.087   0.016 -0.078 -0.029        
scl(_sm):() -0.039  0.005   0.121  0.028 -0.019  0.061 

multimodal similarity and image similarity ar the only significant ones here…pruning

pruned_multimodal_model_all <- lmer(scale(mean_target_looking_critical_window) ~ scale(multimodal_similarity)*scale(age)
                    + scale(aoa) + scale(MeanSaliencyDiff)
                    + (1 | subject_id)
                    + (1|dataset_id), data = usable_trials_summarized_with_sims)
summary(pruned_multimodal_model_all)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: 
scale(mean_target_looking_critical_window) ~ scale(multimodal_similarity) *  
    scale(age) + scale(aoa) + scale(MeanSaliencyDiff) + (1 |  
    subject_id) + (1 | dataset_id)
   Data: usable_trials_summarized_with_sims

REML criterion at convergence: 74160.8

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.0210 -0.6286  0.1237  0.7363  2.1261 

Random effects:
 Groups     Name        Variance Std.Dev.
 subject_id (Intercept) 0.04349  0.2085  
 dataset_id (Intercept) 0.03377  0.1838  
 Residual               0.87136  0.9335  
Number of obs: 27109, groups:  subject_id, 1579; dataset_id, 26

Fixed effects:
                                          Estimate Std. Error         df
(Intercept)                              3.696e-02  3.785e-02  2.418e+01
scale(multimodal_similarity)            -4.924e-02  7.308e-03  2.633e+04
scale(age)                               1.968e-01  1.444e-02  1.343e+03
scale(aoa)                              -4.212e-03  6.560e-03  2.451e+04
scale(MeanSaliencyDiff)                  2.719e-02  5.874e-03  2.684e+04
scale(multimodal_similarity):scale(age) -2.884e-02  7.327e-03  2.602e+04
                                        t value Pr(>|t|)    
(Intercept)                               0.976    0.339    
scale(multimodal_similarity)             -6.738 1.64e-11 ***
scale(age)                               13.624  < 2e-16 ***
scale(aoa)                               -0.642    0.521    
scale(MeanSaliencyDiff)                   4.629 3.69e-06 ***
scale(multimodal_similarity):scale(age)  -3.935 8.33e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) scl(g) scal() s(MSD)
scl(mltmd_)  0.046                            
scale(age)   0.000 -0.052                     
scale(aoa)  -0.023  0.063 -0.006              
scl(MnSlnD)  0.004  0.016  0.014 -0.041       
scl(ml_):() -0.014  0.002  0.052  0.100 -0.071

effect stays and with a pretty robust random effects structure and with an interaction with age. feels like this is a really interesting effect, especially given that the other models across vanilla and non-vanilla are not similarly predictive.

including baseline window as covariate

baseline_data <- model_data |> filter(min_time <= -500) # getting rid of 5 individual trials ha

mods_baseline_covariate <-  lapply(sims, fit_main, data=baseline_data, added_structure="scale(mean_target_looking_baseline_window)", pruned_model=TRUE); names(mods_baseline_covariate) <- sims
lapply(mods_baseline_covariate, function(m) summary(m))
$image_similarity
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: f
   Data: data

REML criterion at convergence: 58111.8

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.2500 -0.6312  0.1191  0.7318  2.5275 

Random effects:
 Groups                        Name        Variance Std.Dev.
 subject_id                    (Intercept) 0.04618  0.2149  
 original_target_label:img_key (Intercept) 0.04152  0.2038  
 dataset_id                    (Intercept) 0.02398  0.1548  
 Residual                                  0.84431  0.9189  
Number of obs: 21307, groups:  
subject_id, 1356; original_target_label:img_key, 865; dataset_id, 24

Fixed effects:
                                             Estimate Std. Error         df
(Intercept)                                -6.207e-03  3.611e-02  2.216e+01
scale(image_similarity)                    -3.260e-02  1.615e-02  4.290e+02
scale(age)                                  2.088e-01  1.506e-02  8.574e+02
scale(aoa)                                  1.208e-03  1.130e-02  6.911e+02
scale(MeanSaliencyDiff)                     4.249e-03  1.127e-02  4.460e+02
scale(mean_target_looking_baseline_window)  1.033e-01  6.550e-03  2.085e+04
scale(image_similarity):scale(age)         -7.285e-03  1.075e-02  2.624e+03
                                           t value Pr(>|t|)    
(Intercept)                                 -0.172   0.8651    
scale(image_similarity)                     -2.018   0.0442 *  
scale(age)                                  13.863   <2e-16 ***
scale(aoa)                                   0.107   0.9149    
scale(MeanSaliencyDiff)                      0.377   0.7064    
scale(mean_target_looking_baseline_window)  15.764   <2e-16 ***
scale(image_similarity):scale(age)          -0.678   0.4980    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) scl(g) scal() s(MSD) s(____
scl(mg_sml) -0.049                                   
scale(age)  -0.102 -0.046                            
scale(aoa)  -0.073 -0.078 -0.043                     
scl(MnSlnD)  0.001  0.084  0.022  0.068              
scl(mn____) -0.002 -0.002 -0.007  0.000 -0.017       
scl(mg_):() -0.033  0.078 -0.282  0.026 -0.053 -0.001

$text_similarity
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: f
   Data: data

REML criterion at convergence: 58115.9

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.2423 -0.6312  0.1179  0.7338  2.5262 

Random effects:
 Groups                        Name        Variance Std.Dev.
 subject_id                    (Intercept) 0.04611  0.2147  
 original_target_label:img_key (Intercept) 0.04238  0.2059  
 dataset_id                    (Intercept) 0.02494  0.1579  
 Residual                                  0.84424  0.9188  
Number of obs: 21307, groups:  
subject_id, 1356; original_target_label:img_key, 865; dataset_id, 24

Fixed effects:
                                             Estimate Std. Error         df
(Intercept)                                -8.100e-03  3.715e-02  2.282e+01
scale(text_similarity)                      3.128e-03  1.628e-02  5.257e+02
scale(age)                                  2.069e-01  1.506e-02  9.285e+02
scale(aoa)                                  4.049e-04  1.154e-02  6.700e+02
scale(MeanSaliencyDiff)                     5.867e-03  1.129e-02  4.497e+02
scale(mean_target_looking_baseline_window)  1.031e-01  6.551e-03  2.085e+04
scale(text_similarity):scale(age)           4.337e-03  1.063e-02  2.813e+03
                                           t value Pr(>|t|)    
(Intercept)                                 -0.218    0.829    
scale(text_similarity)                       0.192    0.848    
scale(age)                                  13.734   <2e-16 ***
scale(aoa)                                   0.035    0.972    
scale(MeanSaliencyDiff)                      0.520    0.603    
scale(mean_target_looking_baseline_window)  15.736   <2e-16 ***
scale(text_similarity):scale(age)            0.408    0.683    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) scl(g) scal() s(MSD) s(____
scl(txt_sm)  0.149                                   
scale(age)  -0.091  0.005                            
scale(aoa)  -0.041  0.180 -0.015                     
scl(MnSlnD)  0.010  0.030  0.017  0.083              
scl(mn____) -0.003  0.000 -0.009  0.000 -0.017       
scl(tx_):()  0.049 -0.120  0.272  0.051  0.023 -0.005

$multimodal_similarity
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: f
   Data: data

REML criterion at convergence: 58115.2

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.2413 -0.6308  0.1185  0.7328  2.5241 

Random effects:
 Groups                        Name        Variance Std.Dev.
 subject_id                    (Intercept) 0.04613  0.2148  
 original_target_label:img_key (Intercept) 0.04197  0.2049  
 dataset_id                    (Intercept) 0.02408  0.1552  
 Residual                                  0.84431  0.9189  
Number of obs: 21307, groups:  
subject_id, 1356; original_target_label:img_key, 865; dataset_id, 24

Fixed effects:
                                             Estimate Std. Error         df
(Intercept)                                -1.160e-02  3.614e-02  2.235e+01
scale(multimodal_similarity)               -1.688e-02  1.240e-02  4.063e+02
scale(age)                                  2.050e-01  1.457e-02  7.379e+02
scale(aoa)                                 -2.726e-03  1.142e-02  6.888e+02
scale(MeanSaliencyDiff)                     5.362e-03  1.125e-02  4.513e+02
scale(mean_target_looking_baseline_window)  1.031e-01  6.551e-03  2.085e+04
scale(multimodal_similarity):scale(age)    -3.357e-04  9.045e-03  2.767e+03
                                           t value Pr(>|t|)    
(Intercept)                                 -0.321    0.751    
scale(multimodal_similarity)                -1.362    0.174    
scale(age)                                  14.072   <2e-16 ***
scale(aoa)                                  -0.239    0.811    
scale(MeanSaliencyDiff)                      0.476    0.634    
scale(mean_target_looking_baseline_window)  15.738   <2e-16 ***
scale(multimodal_similarity):scale(age)     -0.037    0.970    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) scl(g) scal() s(MSD) s(____
scl(mltmd_)  0.024                                   
scale(age)  -0.114  0.004                            
scale(aoa)  -0.071  0.151 -0.034                     
scl(MnSlnD)  0.004  0.026  0.007  0.079              
scl(mn____) -0.002  0.007 -0.009  0.001 -0.016       
scl(ml_):()  0.012 -0.031  0.126  0.018 -0.029 -0.012

$ooo_similarity
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: f
   Data: data

REML criterion at convergence: 55255.3

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.2374 -0.6265  0.1198  0.7296  2.5432 

Random effects:
 Groups                        Name        Variance Std.Dev.
 subject_id                    (Intercept) 0.04912  0.2216  
 original_target_label:img_key (Intercept) 0.03987  0.1997  
 dataset_id                    (Intercept) 0.02217  0.1489  
 Residual                                  0.83758  0.9152  
Number of obs: 20311, groups:  
subject_id, 1356; original_target_label:img_key, 795; dataset_id, 24

Fixed effects:
                                             Estimate Std. Error         df
(Intercept)                                -1.482e-02  3.540e-02  2.225e+01
scale(ooo_similarity)                      -2.564e-02  1.593e-02  3.838e+02
scale(age)                                  2.125e-01  1.485e-02  6.062e+02
scale(aoa)                                 -3.471e-03  1.172e-02  6.259e+02
scale(MeanSaliencyDiff)                     5.299e-03  1.154e-02  4.080e+02
scale(mean_target_looking_baseline_window)  9.605e-02  6.656e-03  1.984e+04
scale(ooo_similarity):scale(age)            1.632e-02  9.471e-03  2.306e+03
                                           t value Pr(>|t|)    
(Intercept)                                 -0.419    0.679    
scale(ooo_similarity)                       -1.610    0.108    
scale(age)                                  14.309   <2e-16 ***
scale(aoa)                                  -0.296    0.767    
scale(MeanSaliencyDiff)                      0.459    0.646    
scale(mean_target_looking_baseline_window)  14.430   <2e-16 ***
scale(ooo_similarity):scale(age)             1.723    0.085 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) scl(g) scal() s(MSD) s(____
scl(_smlrt)  0.106                                   
scale(age)  -0.126 -0.031                            
scale(aoa)  -0.096 -0.079 -0.036                     
scl(MnSlnD)  0.015  0.070  0.005  0.068              
scl(mn____) -0.002  0.001 -0.008  0.003 -0.016       
scl(_sm):() -0.042 -0.128  0.222  0.028 -0.032 -0.003

predicting baseline corrected looking

mods_baseline_corrected <-  lapply(sims, fit_main, data=baseline_data, response="corrected_target_looking"); names(mods_baseline_corrected) <- sims
lapply(mods_baseline_corrected, function(m) m@optinfo$conv$lme4$messages)
$image_similarity
NULL

$text_similarity
[1] "boundary (singular) fit: see help('isSingular')"

$multimodal_similarity
NULL

$ooo_similarity
[1] "boundary (singular) fit: see help('isSingular')"
lapply(mods_baseline_corrected, function(m) summary(m))
$image_similarity
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: f
   Data: data

REML criterion at convergence: 59514.2

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-3.11900 -0.63915 -0.02499  0.70133  2.46636 

Random effects:
 Groups                        Name                    Variance Std.Dev. Corr 
 subject_id                    (Intercept)             0.016818 0.12968       
                               scale(image_similarity) 0.001158 0.03403  -0.33
 original_target_label:img_key (Intercept)             0.007625 0.08732       
 dataset_id                    (Intercept)             0.012481 0.11172       
 Residual                                              0.932947 0.96589       
Number of obs: 21307, groups:  
subject_id, 1356; original_target_label:img_key, 865; dataset_id, 24

Fixed effects:
                                     Estimate Std. Error         df t value
(Intercept)                        -1.116e-02  2.629e-02  2.178e+01  -0.425
scale(image_similarity)            -3.007e-02  1.128e-02  2.490e+02  -2.665
scale(age)                          1.326e-01  1.358e-02  4.144e+02   9.764
scale(aoa)                         -2.360e-03  8.766e-03  7.300e+02  -0.269
scale(MeanSaliencyDiff)            -1.516e-02  8.168e-03  4.546e+02  -1.856
scale(image_similarity):scale(age) -4.733e-03  9.542e-03  1.087e+03  -0.496
                                   Pr(>|t|)    
(Intercept)                          0.6753    
scale(image_similarity)              0.0082 ** 
scale(age)                           <2e-16 ***
scale(aoa)                           0.7878    
scale(MeanSaliencyDiff)              0.0641 .  
scale(image_similarity):scale(age)   0.6200    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) scl(g) scal() s(MSD)
scl(mg_sml) -0.053                            
scale(age)  -0.133 -0.057                     
scale(aoa)  -0.072 -0.084 -0.053              
scl(MnSlnD)  0.005  0.089  0.025  0.053       
scl(mg_):() -0.037  0.118 -0.280  0.020 -0.063

$text_similarity
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: f
   Data: data

REML criterion at convergence: 59522.9

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-3.11902 -0.63720 -0.02521  0.70026  2.45410 

Random effects:
 Groups                        Name                   Variance Std.Dev. Corr 
 subject_id                    (Intercept)            0.017710 0.13308       
                               scale(text_similarity) 0.000184 0.01356  -1.00
 original_target_label:img_key (Intercept)            0.008458 0.09197       
 dataset_id                    (Intercept)            0.012059 0.10981       
 Residual                                             0.932676 0.96575       
Number of obs: 21307, groups:  
subject_id, 1356; original_target_label:img_key, 865; dataset_id, 24

Fixed effects:
                                    Estimate Std. Error         df t value
(Intercept)                       -1.592e-02  2.632e-02  2.213e+01  -0.605
scale(text_similarity)            -3.126e-03  1.201e-02  4.584e+02  -0.260
scale(age)                         1.298e-01  1.355e-02  3.812e+02   9.581
scale(aoa)                        -4.342e-03  8.960e-03  7.155e+02  -0.485
scale(MeanSaliencyDiff)           -1.348e-02  8.229e-03  4.581e+02  -1.638
scale(text_similarity):scale(age)  3.062e-04  9.219e-03  1.108e+03   0.033
                                  Pr(>|t|)    
(Intercept)                          0.551    
scale(text_similarity)               0.795    
scale(age)                          <2e-16 ***
scale(aoa)                           0.628    
scale(MeanSaliencyDiff)              0.102    
scale(text_similarity):scale(age)    0.974    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) scl(g) scal() s(MSD)
scl(txt_sm)  0.137                            
scale(age)  -0.124 -0.004                     
scale(aoa)  -0.048  0.126 -0.024              
scl(MnSlnD)  0.011  0.021  0.015  0.068       
scl(tx_):()  0.047 -0.215  0.257  0.076  0.015
optimizer (nloptwrap) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')


$multimodal_similarity
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: f
   Data: data

REML criterion at convergence: 59521.4

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-3.12403 -0.63665 -0.02594  0.70257  2.47284 

Random effects:
 Groups                        Name                         Variance  Std.Dev.
 subject_id                    (Intercept)                  0.0173206 0.13161 
                               scale(multimodal_similarity) 0.0009835 0.03136 
 original_target_label:img_key (Intercept)                  0.0084057 0.09168 
 dataset_id                    (Intercept)                  0.0122593 0.11072 
 Residual                                                   0.9323702 0.96559 
 Corr
     
 0.33
     
     
     
Number of obs: 21307, groups:  
subject_id, 1356; original_target_label:img_key, 865; dataset_id, 24

Fixed effects:
                                          Estimate Std. Error         df
(Intercept)                              -0.015687   0.026116  21.784115
scale(multimodal_similarity)             -0.005321   0.008845 315.218927
scale(age)                                0.127943   0.013106 341.197657
scale(aoa)                               -0.005354   0.008904 715.338540
scale(MeanSaliencyDiff)                  -0.013001   0.008233 462.491157
scale(multimodal_similarity):scale(age)  -0.010106   0.007859 978.700314
                                        t value Pr(>|t|)    
(Intercept)                              -0.601    0.554    
scale(multimodal_similarity)             -0.602    0.548    
scale(age)                                9.762   <2e-16 ***
scale(aoa)                               -0.601    0.548    
scale(MeanSaliencyDiff)                  -1.579    0.115    
scale(multimodal_similarity):scale(age)  -1.286    0.199    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) scl(g) scal() s(MSD)
scl(mltmd_)  0.029                            
scale(age)  -0.148  0.001                     
scale(aoa)  -0.073  0.131 -0.049              
scl(MnSlnD)  0.006  0.032  0.004  0.067       
scl(ml_):()  0.014 -0.055  0.117  0.018 -0.055

$ooo_similarity
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: f
   Data: data

REML criterion at convergence: 56921.8

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-3.06679 -0.63558 -0.02463  0.70568  2.42243 

Random effects:
 Groups                        Name                  Variance  Std.Dev. Corr 
 subject_id                    (Intercept)           0.0172670 0.13140       
                               scale(ooo_similarity) 0.0002787 0.01670  -1.00
 original_target_label:img_key (Intercept)           0.0072348 0.08506       
 dataset_id                    (Intercept)           0.0114660 0.10708       
 Residual                                            0.9422191 0.97068       
Number of obs: 20311, groups:  
subject_id, 1356; original_target_label:img_key, 795; dataset_id, 24

Fixed effects:
                                   Estimate Std. Error         df t value
(Intercept)                      -1.988e-02  2.575e-02  2.176e+01  -0.772
scale(ooo_similarity)            -2.121e-02  1.154e-02  3.738e+02  -1.838
scale(age)                        1.362e-01  1.333e-02  2.847e+02  10.215
scale(aoa)                       -2.259e-03  9.117e-03  6.766e+02  -0.248
scale(MeanSaliencyDiff)          -1.446e-02  8.356e-03  4.128e+02  -1.730
scale(ooo_similarity):scale(age)  1.597e-02  8.090e-03  1.003e+03   1.974
                                 Pr(>|t|)    
(Intercept)                        0.4484    
scale(ooo_similarity)              0.0668 .  
scale(age)                         <2e-16 ***
scale(aoa)                         0.8044    
scale(MeanSaliencyDiff)            0.0843 .  
scale(ooo_similarity):scale(age)   0.0487 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) scl(g) scal() s(MSD)
scl(_smlrt)  0.100                            
scale(age)  -0.158 -0.028                     
scale(aoa)  -0.099 -0.112 -0.050              
scl(MnSlnD)  0.021  0.078  0.004  0.050       
scl(_sm):() -0.042 -0.196  0.183  0.038 -0.039
optimizer (nloptwrap) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')

predicting window type

baseline_data_window_type <- baseline_data |> pivot_longer(cols=c(mean_target_looking_critical_window, mean_target_looking_baseline_window), names_to="window_type", values_to="target_looking") |>
  mutate(window_type = str_replace(window_type, "mean_target_looking_", "")) |>
  mutate(trial_window_c = case_when(
    window_type=="critical_window" ~ 0.5,
    window_type=="baseline_window" ~ -0.5))
mods_baseline_window_type <- lapply(sims, function(sim) {

  fit_main(
    sim = sim,
    data = baseline_data_window_type,
    response = "target_looking",
    pruned_model = TRUE,
    added_structure = sprintf("scale(trial_window_c) * scale(%s)", sim)
  )

}); 
names(mods_baseline_window_type) <- sims
lapply(mods_baseline_window_type, function(m) m@optinfo$conv$lme4$messages)
$image_similarity
NULL

$text_similarity
NULL

$multimodal_similarity
NULL

$ooo_similarity
NULL
lapply(mods_baseline_window_type, function(m) summary(m))
$image_similarity
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: f
   Data: data

REML criterion at convergence: 116735.5

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.74435 -0.70598  0.06353  0.75144  2.33986 

Random effects:
 Groups                        Name        Variance Std.Dev.
 subject_id                    (Intercept) 0.005936 0.07705 
 original_target_label:img_key (Intercept) 0.050519 0.22476 
 dataset_id                    (Intercept) 0.001850 0.04301 
 Residual                                  0.883599 0.94000 
Number of obs: 42614, groups:  
subject_id, 1356; original_target_label:img_key, 865; dataset_id, 24

Fixed effects:
                                                Estimate Std. Error         df
(Intercept)                                    3.082e-04  1.576e-02  1.805e+01
scale(image_similarity)                       -1.258e-02  1.361e-02  3.098e+02
scale(age)                                     1.022e-01  9.172e-03  4.183e+02
scale(aoa)                                     1.283e-03  9.481e-03  6.035e+02
scale(MeanSaliencyDiff)                        1.455e-02  1.029e-02  5.496e+02
scale(trial_window_c)                          2.216e-01  4.554e-03  4.142e+04
scale(image_similarity):scale(age)            -1.183e-03  7.385e-03  1.723e+03
scale(image_similarity):scale(trial_window_c) -1.011e-02  4.462e-03  4.142e+04
                                              t value Pr(>|t|)    
(Intercept)                                     0.020   0.9846    
scale(image_similarity)                        -0.924   0.3561    
scale(age)                                     11.145   <2e-16 ***
scale(aoa)                                      0.135   0.8924    
scale(MeanSaliencyDiff)                         1.414   0.1579    
scale(trial_window_c)                          48.650   <2e-16 ***
scale(image_similarity):scale(age)             -0.160   0.8727    
scale(image_similarity):scale(trial_window_c)  -2.265   0.0235 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) scl(g) scal() s(MSD) sc(__) s(_):()
scl(mg_sml) -0.063                                           
scale(age)  -0.045 -0.076                                    
scale(aoa)  -0.133 -0.096 -0.103                             
scl(MnSlnD) -0.018  0.089  0.034  0.070                      
scl(trl_w_)  0.000  0.000  0.000  0.000  0.000               
scl(mg_):() -0.072  0.006 -0.318  0.049 -0.050  0.000        
scl(_):(__)  0.000  0.000  0.000  0.000  0.000  0.017  0.000 

$text_similarity
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: f
   Data: data

REML criterion at convergence: 116693.4

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.7779 -0.7042  0.0637  0.7496  2.3743 

Random effects:
 Groups                        Name        Variance Std.Dev.
 subject_id                    (Intercept) 0.005934 0.07703 
 original_target_label:img_key (Intercept) 0.050674 0.22511 
 dataset_id                    (Intercept) 0.002097 0.04579 
 Residual                                  0.882670 0.93951 
Number of obs: 42614, groups:  
subject_id, 1356; original_target_label:img_key, 865; dataset_id, 24

Fixed effects:
                                               Estimate Std. Error         df
(Intercept)                                  -7.511e-04  1.656e-02  1.906e+01
scale(text_similarity)                       -5.940e-03  1.279e-02  1.631e+02
scale(age)                                    1.033e-01  9.334e-03  5.050e+02
scale(aoa)                                    6.957e-05  9.811e-03  6.877e+02
scale(MeanSaliencyDiff)                       1.541e-02  1.026e-02  5.556e+02
scale(trial_window_c)                         2.215e-01  4.551e-03  4.142e+04
scale(text_similarity):scale(age)             6.388e-03  7.790e-03  1.334e+03
scale(text_similarity):scale(trial_window_c) -3.084e-02  4.483e-03  4.142e+04
                                             t value Pr(>|t|)    
(Intercept)                                   -0.045    0.964    
scale(text_similarity)                        -0.465    0.643    
scale(age)                                    11.065  < 2e-16 ***
scale(aoa)                                     0.007    0.994    
scale(MeanSaliencyDiff)                        1.502    0.134    
scale(trial_window_c)                         48.665  < 2e-16 ***
scale(text_similarity):scale(age)              0.820    0.412    
scale(text_similarity):scale(trial_window_c)  -6.879 6.13e-12 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) scl(g) scal() s(MSD) sc(__) s(_):()
scl(txt_sm)  0.206                                           
scale(age)  -0.017  0.076                                    
scale(aoa)  -0.065  0.251 -0.037                             
scl(MnSlnD) -0.005  0.027  0.038  0.089                      
scl(trl_w_)  0.000  0.000  0.000  0.000  0.000               
scl(tx_):()  0.104 -0.049  0.345  0.076  0.034  0.000        
scl(_):(__)  0.000  0.000  0.000  0.000  0.000  0.008  0.000 

$multimodal_similarity
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: f
   Data: data

REML criterion at convergence: 116729.9

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.76169 -0.70593  0.06425  0.75307  2.35370 

Random effects:
 Groups                        Name        Variance Std.Dev.
 subject_id                    (Intercept) 0.005917 0.07692 
 original_target_label:img_key (Intercept) 0.050185 0.22402 
 dataset_id                    (Intercept) 0.001994 0.04466 
 Residual                                  0.883527 0.93996 
Number of obs: 42614, groups:  
subject_id, 1356; original_target_label:img_key, 865; dataset_id, 24

Fixed effects:
                                                     Estimate Std. Error
(Intercept)                                        -8.625e-04  1.591e-02
scale(multimodal_similarity)                       -1.314e-02  1.139e-02
scale(age)                                          1.025e-01  8.781e-03
scale(aoa)                                         -9.827e-04  9.536e-03
scale(MeanSaliencyDiff)                             1.498e-02  1.022e-02
scale(trial_window_c)                               2.216e-01  4.554e-03
scale(multimodal_similarity):scale(age)             8.754e-03  6.927e-03
scale(multimodal_similarity):scale(trial_window_c) -1.363e-02  4.491e-03
                                                           df t value Pr(>|t|)
(Intercept)                                         1.889e+01  -0.054  0.95735
scale(multimodal_similarity)                        5.106e+02  -1.153  0.24937
scale(age)                                          3.310e+02  11.669  < 2e-16
scale(aoa)                                          5.980e+02  -0.103  0.91795
scale(MeanSaliencyDiff)                             5.518e+02   1.466  0.14322
scale(trial_window_c)                               4.142e+04  48.655  < 2e-16
scale(multimodal_similarity):scale(age)             5.186e+03   1.264  0.20633
scale(multimodal_similarity):scale(trial_window_c)  4.142e+04  -3.035  0.00241
                                                      
(Intercept)                                           
scale(multimodal_similarity)                          
scale(age)                                         ***
scale(aoa)                                            
scale(MeanSaliencyDiff)                               
scale(trial_window_c)                              ***
scale(multimodal_similarity):scale(age)               
scale(multimodal_similarity):scale(trial_window_c) ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) scl(g) scal() s(MSD) sc(__) s(_):()
scl(mltmd_)  0.032                                           
scale(age)  -0.075  0.012                                    
scale(aoa)  -0.129  0.152 -0.093                             
scl(MnSlnD) -0.016  0.011  0.024  0.082                      
scl(trl_w_)  0.000  0.000  0.000  0.000  0.000               
scl(ml_):()  0.022 -0.017  0.139  0.012 -0.016  0.000        
scl(_):(__)  0.000  0.000  0.000  0.000  0.000  0.013  0.000 

$ooo_similarity
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: f
   Data: data

REML criterion at convergence: 111349.5

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.75772 -0.70507  0.06405  0.74717  2.27188 

Random effects:
 Groups                        Name        Variance Std.Dev.
 subject_id                    (Intercept) 0.006329 0.07955 
 original_target_label:img_key (Intercept) 0.044911 0.21192 
 dataset_id                    (Intercept) 0.001355 0.03681 
 Residual                                  0.886115 0.94134 
Number of obs: 40622, groups:  
subject_id, 1356; original_target_label:img_key, 795; dataset_id, 24

Fixed effects:
                                              Estimate Std. Error         df
(Intercept)                                  3.661e-04  1.493e-02  1.511e+01
scale(ooo_similarity)                       -1.146e-02  1.195e-02  1.276e+02
scale(age)                                   1.048e-01  8.779e-03  1.942e+02
scale(aoa)                                  -3.265e-03  9.576e-03  4.729e+02
scale(MeanSaliencyDiff)                      1.513e-02  1.027e-02  5.085e+02
scale(trial_window_c)                        2.224e-01  4.675e-03  3.951e+04
scale(ooo_similarity):scale(age)             1.070e-02  6.707e-03  1.322e+03
scale(ooo_similarity):scale(trial_window_c) -2.848e-02  4.661e-03  3.951e+04
                                            t value Pr(>|t|)    
(Intercept)                                   0.025    0.981    
scale(ooo_similarity)                        -0.959    0.340    
scale(age)                                   11.941   <2e-16 ***
scale(aoa)                                   -0.341    0.733    
scale(MeanSaliencyDiff)                       1.473    0.141    
scale(trial_window_c)                        47.571   <2e-16 ***
scale(ooo_similarity):scale(age)              1.596    0.111    
scale(ooo_similarity):scale(trial_window_c)  -6.111    1e-09 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) scl(g) scal() s(MSD) sc(__) s(_):()
scl(_smlrt)  0.114                                           
scale(age)  -0.074 -0.012                                    
scale(aoa)  -0.159 -0.098 -0.108                             
scl(MnSlnD) -0.005  0.043  0.023  0.075                      
scl(trl_w_)  0.000  0.000  0.000  0.000  0.000               
scl(_sm):() -0.028 -0.070  0.206  0.009 -0.026  0.000        
scl(_):(__)  0.000  0.000  0.000  0.000  0.000  0.044  0.000 

I mean these are all interesting, sure.

# 1. Define models and scale constants
mm    <- mods_baseline_window_type[[3]] 
ooo   <- mods_baseline_window_type[[4]] 
text  <- mods_baseline_window_type[[2]] 
image <- mods_baseline_window_type[[1]] 

mu  <- mean(baseline_data_window_type$target_looking, na.rm = TRUE)
sig <- sd(baseline_data_window_type$target_looking, na.rm = TRUE)

# 2. Generate predictions (using unique variable names)
pred_mm    <- ggpredict(mm, terms = c("multimodal_similarity", "trial_window_c [-0.5, 0.5]"))
pred_ooo   <- ggpredict(ooo, terms = c("ooo_similarity", "trial_window_c [-0.5, 0.5]"))
pred_text  <- ggpredict(text, terms = c("text_similarity", "trial_window_c [-0.5, 0.5]"))
pred_image <- ggpredict(image, terms = c("image_similarity", "trial_window_c [-0.5, 0.5]"))

# 3. Add a source/type label to each dataframe
pred_mm$sim_type    <- "Multimodal similarity"
pred_ooo$sim_type   <- "Adult behavioral similarity"
pred_text$sim_type  <- "Text similarity"
pred_image$sim_type <- "Image similarity"

# 4. Combine all predictions into one dataframe
combined_preds <- rbind(pred_mm, pred_ooo, pred_text, pred_image)

combined_preds$sim_type <- factor(
  combined_preds$sim_type,
  levels = c(
    "Image similarity",
    "Text similarity",
    "Multimodal similarity",
    "Adult behavioral similarity"
  )
)

# 5. Un-scale / Back-transform the predicted values and confidence intervals
combined_preds$predicted_raw <- combined_preds$predicted * sig + mu
combined_preds$conf.low_raw  <- combined_preds$conf.low  * sig + mu
combined_preds$conf.high_raw <- combined_preds$conf.high * sig + mu

# 6. Create the Faceted Plot
baseline_interaction_plot <- ggplot(
  combined_preds,
  aes(x = x, y = predicted_raw, colour = group, fill = group)
) +
  geom_ribbon(
    aes(ymin = conf.low_raw, ymax = conf.high_raw),
    alpha = 0.15,
    colour = NA
  ) +
  geom_line(linewidth = 1.2) +
  geom_hline(
    yintercept = 0.5,
    linetype = "dashed",
    alpha = 0.7
  ) +
  # This splits your plot into 4 facets based on similarity type
  facet_wrap(~ sim_type, scales = "free_x") + 
  labs(
    title = "Effect of similarities by trial window type",
    x = "Target-distractor similarity",
    y = "Predicted proportion target looking",
    colour = "Trial window",
    fill = "Trial window"
  ) +
  scale_color_discrete(
    labels = c("Baseline window", "Critical window")
  ) +
  scale_fill_discrete(
    labels = c("Baseline window", "Critical window")
  ) +
  theme_classic() +
  theme(
    strip.background = element_blank(), # Cleans up facet header backgrounds
    strip.text = element_text(face = "bold", size = 11), # Emphasizes facet titles
    legend.position = "bottom" # Optional: Moves legend to bottom for wider facet viewing
  )
ggsave(here("figures/baseline_interaction.png"), baseline_interaction_plot)

baseline window looking?

mods_baseline_looking <-  lapply(sims, fit_main, data=baseline_data, response="mean_target_looking_baseline_window"); names(mods_baseline_looking) <- sims
lapply(mods_baseline_looking, function(m) m@optinfo$conv$lme4$messages)
$image_similarity
[1] "boundary (singular) fit: see help('isSingular')"

$text_similarity
[1] "boundary (singular) fit: see help('isSingular')"

$multimodal_similarity
[1] "boundary (singular) fit: see help('isSingular')"

$ooo_similarity
[1] "boundary (singular) fit: see help('isSingular')"
lapply(mods_baseline_looking, function(m) summary(m))
$image_similarity
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: f
   Data: data

REML criterion at convergence: 59654.2

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.01216 -0.82273 -0.00163  0.78419  2.08583 

Random effects:
 Groups                        Name                    Variance  Std.Dev. Corr
 subject_id                    (Intercept)             0.000e+00 0.000000     
                               scale(image_similarity) 3.595e-05 0.005996  NaN
 original_target_label:img_key (Intercept)             4.611e-02 0.214723     
 dataset_id                    (Intercept)             0.000e+00 0.000000     
 Residual                                              9.391e-01 0.969055     
Number of obs: 21307, groups:  
subject_id, 1356; original_target_label:img_key, 865; dataset_id, 24

Fixed effects:
                                     Estimate Std. Error         df t value
(Intercept)                         9.571e-03  1.273e-02  4.461e+02   0.752
scale(image_similarity)             1.300e-03  1.407e-02  4.081e+02   0.092
scale(age)                          1.514e-02  1.088e-02  2.486e+03   1.391
scale(aoa)                          3.830e-03  1.067e-02  7.540e+02   0.359
scale(MeanSaliencyDiff)             2.862e-02  1.150e-02  5.226e+02   2.488
scale(image_similarity):scale(age) -9.589e-05  9.231e-03  1.947e+03  -0.010
                                   Pr(>|t|)  
(Intercept)                          0.4524  
scale(image_similarity)              0.9264  
scale(age)                           0.1643  
scale(aoa)                           0.7197  
scale(MeanSaliencyDiff)              0.0132 *
scale(image_similarity):scale(age)   0.9917  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) scl(g) scal() s(MSD)
scl(mg_sml) -0.038                            
scale(age)   0.049 -0.108                     
scale(aoa)  -0.146 -0.110 -0.179              
scl(MnSlnD) -0.036  0.069  0.051  0.068       
scl(mg_):() -0.132 -0.055 -0.352  0.090 -0.054
optimizer (nloptwrap) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')


$text_similarity
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: f
   Data: data

REML criterion at convergence: 59653.9

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.01323 -0.82437 -0.00213  0.78454  2.08579 

Random effects:
 Groups                        Name                   Variance  Std.Dev.  Corr
 subject_id                    (Intercept)            1.796e-09 4.238e-05     
                               scale(text_similarity) 1.227e-10 1.108e-05 1.00
 original_target_label:img_key (Intercept)            4.616e-02 2.149e-01     
 dataset_id                    (Intercept)            1.560e-10 1.249e-05     
 Residual                                             9.391e-01 9.691e-01     
Number of obs: 21307, groups:  
subject_id, 1356; original_target_label:img_key, 865; dataset_id, 24

Fixed effects:
                                    Estimate Std. Error         df t value
(Intercept)                        1.127e-02  1.299e-02  4.351e+02   0.868
scale(text_similarity)            -7.033e-04  1.248e-02  4.873e+02  -0.056
scale(age)                         1.799e-02  1.100e-02  1.748e+03   1.635
scale(aoa)                         4.795e-03  1.111e-02  7.195e+02   0.432
scale(MeanSaliencyDiff)            2.893e-02  1.148e-02  5.248e+02   2.520
scale(text_similarity):scale(age)  6.781e-03  9.551e-03  1.837e+03   0.710
                                  Pr(>|t|)  
(Intercept)                          0.386  
scale(text_similarity)               0.955  
scale(age)                           0.102  
scale(aoa)                           0.666  
scale(MeanSaliencyDiff)              0.012 *
scale(text_similarity):scale(age)    0.478  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) scl(g) scal() s(MSD)
scl(txt_sm)  0.148                            
scale(age)   0.090  0.156                     
scale(aoa)  -0.065  0.279 -0.061              
scl(MnSlnD) -0.028  0.010  0.060  0.086       
scl(tx_):()  0.189 -0.023  0.362  0.124  0.049
optimizer (nloptwrap) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')


$multimodal_similarity
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: f
   Data: data

REML criterion at convergence: 59641.7

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.01340 -0.82210 -0.00101  0.78181  2.08068 

Random effects:
 Groups                        Name                         Variance  Std.Dev. 
 subject_id                    (Intercept)                  0.000e+00 0.000e+00
                               scale(multimodal_similarity) 3.304e-03 5.748e-02
 original_target_label:img_key (Intercept)                  4.541e-02 2.131e-01
 dataset_id                    (Intercept)                  1.279e-10 1.131e-05
 Residual                                                   9.356e-01 9.673e-01
 Corr
     
  NaN
     
     
     
Number of obs: 21307, groups:  
subject_id, 1356; original_target_label:img_key, 865; dataset_id, 24

Fixed effects:
                                          Estimate Std. Error         df
(Intercept)                              1.018e-02  1.256e-02  4.380e+02
scale(multimodal_similarity)            -1.255e-02  1.275e-02  4.578e+02
scale(age)                               1.672e-02  1.014e-02  1.843e+03
scale(aoa)                               2.798e-03  1.068e-02  7.353e+02
scale(MeanSaliencyDiff)                  2.807e-02  1.142e-02  5.250e+02
scale(multimodal_similarity):scale(age)  1.507e-02  9.201e-03  2.139e+03
                                        t value Pr(>|t|)  
(Intercept)                               0.811   0.4180  
scale(multimodal_similarity)             -0.984   0.3256  
scale(age)                                1.650   0.0992 .
scale(aoa)                                0.262   0.7935  
scale(MeanSaliencyDiff)                   2.458   0.0143 *
scale(multimodal_similarity):scale(age)   1.638   0.1016  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) scl(g) scal() s(MSD)
scl(mltmd_)  0.010                            
scale(age)   0.001  0.020                     
scale(aoa)  -0.137  0.153 -0.168              
scl(MnSlnD) -0.041  0.012  0.041  0.081       
scl(ml_):()  0.044 -0.063  0.097  0.012 -0.026
optimizer (nloptwrap) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')


$ooo_similarity
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: f
   Data: data

REML criterion at convergence: 57024.8

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-1.98375 -0.82816 -0.00175  0.78846  2.05156 

Random effects:
 Groups                        Name                  Variance  Std.Dev.  Corr
 subject_id                    (Intercept)           0.000e+00 0.000e+00     
                               scale(ooo_similarity) 1.984e-12 1.409e-06  NaN
 original_target_label:img_key (Intercept)           4.111e-02 2.028e-01     
 dataset_id                    (Intercept)           0.000e+00 0.000e+00     
 Residual                                            9.483e-01 9.738e-01     
Number of obs: 20311, groups:  
subject_id, 1356; original_target_label:img_key, 795; dataset_id, 24

Fixed effects:
                                   Estimate Std. Error         df t value
(Intercept)                       1.082e-02  1.266e-02  3.972e+02   0.854
scale(ooo_similarity)            -2.591e-04  1.218e-02  4.366e+02  -0.021
scale(age)                        1.544e-02  1.023e-02  1.552e+03   1.510
scale(aoa)                        5.628e-04  1.079e-02  6.726e+02   0.052
scale(MeanSaliencyDiff)           2.802e-02  1.154e-02  4.776e+02   2.428
scale(ooo_similarity):scale(age)  3.408e-03  8.426e-03  1.816e+03   0.405
                                 Pr(>|t|)  
(Intercept)                        0.3934  
scale(ooo_similarity)              0.9830  
scale(age)                         0.1313  
scale(aoa)                         0.9584  
scale(MeanSaliencyDiff)            0.0155 *
scale(ooo_similarity):scale(age)   0.6859  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) scl(g) scal() s(MSD)
scl(_smlrt)  0.060                            
scale(age)   0.002  0.038                     
scale(aoa)  -0.144 -0.101 -0.178              
scl(MnSlnD) -0.028  0.015  0.040  0.079       
scl(_sm):()  0.018 -0.079  0.131  0.012 -0.019
optimizer (nloptwrap) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')

Under 3 years old

LWL specific ages and AoA?

mods_younger <-  lapply(sims, fit_main, data=model_data |> filter(age <= 36)); names(mods_younger) <- sims
lapply(mods_younger, function(m) m@optinfo$conv$lme4$messages)
$image_similarity
NULL

$text_similarity
[1] "boundary (singular) fit: see help('isSingular')"

$multimodal_similarity
NULL

$ooo_similarity
NULL
lapply(mods_younger, function(m) summary(m))
$image_similarity
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: f
   Data: data

REML criterion at convergence: 50134.6

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.2328 -0.6425  0.1005  0.7465  2.3764 

Random effects:
 Groups                        Name                    Variance Std.Dev. Corr 
 subject_id                    (Intercept)             0.042827 0.20695       
                               scale(image_similarity) 0.001451 0.03809  -0.32
 original_target_label:img_key (Intercept)             0.051350 0.22660       
 dataset_id                    (Intercept)             0.019115 0.13826       
 Residual                                              0.883328 0.93986       
Number of obs: 18093, groups:  
subject_id, 827; original_target_label:img_key, 789; dataset_id, 19

Fixed effects:
                                     Estimate Std. Error         df t value
(Intercept)                         -0.058929   0.037522  17.938544  -1.571
scale(image_similarity)             -0.031476   0.017869 371.136722  -1.762
scale(age)                           0.107157   0.014508 429.772986   7.386
scale(aoa)                          -0.008869   0.012023 710.924934  -0.738
scale(MeanSaliencyDiff)              0.011070   0.013234 388.928509   0.836
scale(image_similarity):scale(age)  -0.007091   0.011193 768.650303  -0.634
                                   Pr(>|t|)    
(Intercept)                           0.134    
scale(image_similarity)               0.079 .  
scale(age)                         7.92e-13 ***
scale(aoa)                            0.461    
scale(MeanSaliencyDiff)               0.403    
scale(image_similarity):scale(age)    0.527    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) scl(g) scal() s(MSD)
scl(mg_sml) -0.080                            
scale(age)  -0.121 -0.032                     
scale(aoa)  -0.049 -0.089 -0.005              
scl(MnSlnD) -0.003  0.110  0.019  0.076       
scl(mg_):() -0.066  0.027 -0.071  0.022 -0.025

$text_similarity
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: f
   Data: data

REML criterion at convergence: 50129.5

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.2223 -0.6410  0.1006  0.7459  2.3465 

Random effects:
 Groups                        Name                   Variance  Std.Dev. Corr 
 subject_id                    (Intercept)            0.0420219 0.20499       
                               scale(text_similarity) 0.0008227 0.02868  -1.00
 original_target_label:img_key (Intercept)            0.0516830 0.22734       
 dataset_id                    (Intercept)            0.0201505 0.14195       
 Residual                                             0.8830653 0.93972       
Number of obs: 18093, groups:  
subject_id, 827; original_target_label:img_key, 789; dataset_id, 19

Fixed effects:
                                    Estimate Std. Error         df t value
(Intercept)                       -6.285e-02  3.864e-02  1.814e+01  -1.626
scale(text_similarity)             2.630e-04  1.790e-02  4.936e+02   0.015
scale(age)                         1.160e-01  1.514e-02  4.473e+02   7.659
scale(aoa)                        -7.581e-03  1.225e-02  6.889e+02  -0.619
scale(MeanSaliencyDiff)            1.338e-02  1.318e-02  3.941e+02   1.016
scale(text_similarity):scale(age)  1.977e-02  1.015e-02  2.727e+03   1.947
                                  Pr(>|t|)    
(Intercept)                         0.1211    
scale(text_similarity)              0.9883    
scale(age)                        1.17e-13 ***
scale(aoa)                          0.5362    
scale(MeanSaliencyDiff)             0.3105    
scale(text_similarity):scale(age)   0.0516 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) scl(g) scal() s(MSD)
scl(txt_sm)  0.147                            
scale(age)  -0.115 -0.012                     
scale(aoa)  -0.021  0.153  0.016              
scl(MnSlnD)  0.007  0.019  0.021  0.088       
scl(tx_):()  0.016 -0.171  0.234  0.066  0.003
optimizer (nloptwrap) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')


$multimodal_similarity
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: f
   Data: data

REML criterion at convergence: 50133.9

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.2178 -0.6421  0.0997  0.7466  2.4044 

Random effects:
 Groups                        Name                         Variance Std.Dev.
 subject_id                    (Intercept)                  0.04274  0.20672 
                               scale(multimodal_similarity) 0.00376  0.06132 
 original_target_label:img_key (Intercept)                  0.05105  0.22594 
 dataset_id                    (Intercept)                  0.01995  0.14123 
 Residual                                                   0.88071  0.93846 
 Corr 
      
 -0.13
      
      
      
Number of obs: 18093, groups:  
subject_id, 827; original_target_label:img_key, 789; dataset_id, 19

Fixed effects:
                                          Estimate Std. Error         df
(Intercept)                             -6.735e-02  3.794e-02  1.794e+01
scale(multimodal_similarity)            -1.888e-02  1.439e-02  3.775e+02
scale(age)                               1.058e-01  1.464e-02  3.895e+02
scale(aoa)                              -1.261e-02  1.220e-02  7.159e+02
scale(MeanSaliencyDiff)                  1.249e-02  1.313e-02  3.942e+02
scale(multimodal_similarity):scale(age)  7.162e-04  9.158e-03  1.540e+03
                                        t value Pr(>|t|)    
(Intercept)                              -1.775   0.0928 .  
scale(multimodal_similarity)             -1.312   0.1903    
scale(age)                                7.225 2.66e-12 ***
scale(aoa)                               -1.034   0.3016    
scale(MeanSaliencyDiff)                   0.951   0.3421    
scale(multimodal_similarity):scale(age)   0.078   0.9377    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) scl(g) scal() s(MSD)
scl(mltmd_)  0.027                            
scale(age)  -0.125  0.011                     
scale(aoa)  -0.046  0.151  0.005              
scl(MnSlnD)  0.005  0.038  0.022  0.093       
scl(ml_):()  0.019 -0.084  0.106  0.070  0.007

$ooo_similarity
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: f
   Data: data

REML criterion at convergence: 47607.9

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.2332 -0.6411  0.1005  0.7438  2.3779 

Random effects:
 Groups                        Name                  Variance Std.Dev. Corr 
 subject_id                    (Intercept)           0.043377 0.20827       
                               scale(ooo_similarity) 0.007083 0.08416  -0.26
 original_target_label:img_key (Intercept)           0.048158 0.21945       
 dataset_id                    (Intercept)           0.020391 0.14280       
 Residual                                            0.873112 0.93440       
Number of obs: 17227, groups:  
subject_id, 827; original_target_label:img_key, 724; dataset_id, 19

Fixed effects:
                                  Estimate Std. Error        df t value
(Intercept)                       -0.07453    0.03896  17.48204  -1.913
scale(ooo_similarity)             -0.03862    0.01829 311.19597  -2.111
scale(age)                         0.11482    0.01559 351.56543   7.367
scale(aoa)                        -0.01224    0.01256 635.24624  -0.975
scale(MeanSaliencyDiff)            0.01439    0.01337 356.41570   1.076
scale(ooo_similarity):scale(age)   0.01566    0.01026 973.00617   1.526
                                 Pr(>|t|)    
(Intercept)                        0.0723 .  
scale(ooo_similarity)              0.0356 *  
scale(age)                       1.25e-12 ***
scale(aoa)                         0.3301    
scale(MeanSaliencyDiff)            0.2827    
scale(ooo_similarity):scale(age)   0.1272    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) scl(g) scal() s(MSD)
scl(_smlrt)  0.138                            
scale(age)  -0.138 -0.055                     
scale(aoa)  -0.075 -0.090  0.004              
scl(MnSlnD)  0.019  0.079  0.017  0.076       
scl(_sm):() -0.046 -0.214  0.274  0.047 -0.025

this makes me think of the cascading age prediction..younger kids: image similarity, older kids: multimodal, even older kids: text only?

Adult behavioral similarity is significant here..AoA effects only appear between 24 and 36 months.

Conference plots

todo: make sure models used are not singular

predict main line – with datasets

unscaled

plot_data <- vanilla |>
  pivot_longer(all_of(sims), names_to = "sim_type", values_to = "similarity") |>
  mutate(sim_type = factor(sim_type, levels = sims))   # controls panel order

# ---- lmer prediction lines, one stacked data frame ---------------------------
pred <- bind_rows(lapply(sims, function(s) {
  m  <- mods[[s]]
  mf <- model.frame(m)                      # only the rows lmer kept
  # raw similarity range as the model saw it (mf stores the scaled column)
  scol <- mf[[grep(paste0("scale\\(", s), names(mf))]]
  raw  <- as.vector(scol) * attr(scol, "scaled:scale") + attr(scol, "scaled:center")
  x    <- seq(min(raw), max(raw), length.out = 150)
  d  <- model_data
  nd <- data.frame(x,
                   age = mean(d$age, na.rm = TRUE),
                   aoa = mean(d$aoa, na.rm = TRUE),
                   MeanSaliencyDiff = mean(d$MeanSaliencyDiff, na.rm = TRUE))
  names(nd)[1] <- s

  pr <- predict(m, newdata = nd, re.form = NA, se.fit = TRUE)

  rcol <- mf[[1]]
  rc <- attr(rcol, "scaled:center"); rs <- attr(rcol, "scaled:scale")

  data.frame(sim_type = s, similarity = x,
             y     =  pr$fit                     * rs + rc, # un-zscoring
             lower = (pr$fit - 1.96 * pr$se.fit) * rs + rc,
             upper = (pr$fit + 1.96 * pr$se.fit) * rs + rc)
})) |> mutate(sim_type = factor(sim_type, levels = sims))

library(colorspace)

fig <- ggplot(plot_data, aes(similarity, mean_value,
                             colour = dataset_name, fill = dataset_name)) +
  geom_hline(yintercept = 0.5, linetype = "dashed") +
  geom_point(size = 3, alpha = 0.1) +
  stat_smooth(method = "glm", geom = "line", alpha = 0.4, linewidth = 1) +
  # lmer population line + CI
  geom_ribbon(data = pred, inherit.aes = FALSE,
              aes(x = similarity, ymin = lower, ymax = upper),
              fill = "black", alpha = 0.15) +
  geom_line(data = pred, inherit.aes = FALSE,
            aes(x = similarity, y = y),
            colour = "black", alpha = 0.7, linewidth = 1.2) +
  facet_wrap(~ sim_type, nrow = 2, scales = "free_x",
             labeller = labeller(sim_type = facet_labs),
             strip.position = "top") +          # strips read as x-axis titles
  scale_y_continuous(breaks = seq(0, 1, 0.2)) +
  coord_cartesian(ylim = c(0, 1)) +
  scale_color_discrete_qualitative(palette="Set 2") +
  #scale_color_manual(values = colorRampPalette(brewer.pal(8, "Dark2"))(24)) +
  labs(x = "Target–distractor similarity",
       y = "Proportion target looking in critical window",
       colour = "Dataset Name", fill = "Dataset Name") +
  theme_classic(base_size=16) +
  theme(legend.position = "bottom",
        #strip.placement = "outside",
        strip.text = element_text(size = 20),
         panel.spacing = unit(1, "lines"),
        plot.margin = margin(t = 5.5, r = 30, b = 5.5, l = 5.5, unit = "pt"))

ggsave(here("figures", "dataset_similarities.png"),
       fig, width = 16, height = 13, bg = "white")
fig

scaled

library(ggeffects)
library(purrr)
library(dplyr)

# Grab original scales ONLY for back-transforming the Y axis
y_mean <- mean(model_data_scaled$mean_target_looking_critical_window, na.rm = TRUE)
y_sd   <- sd(model_data_scaled$mean_target_looking_critical_window, na.rm = TRUE)

pred_scaled <- map_dfr(sims, function(s) {
  res <- ggpredict(mods_scaled[[s]], terms = sprintf("scaled_%s [all]", s))
  
  res |> 
    as.data.frame() |> 
    transmute(
      sim_type   = s,
      similarity = x,                            # Keep X as the Z-score (-2 to +2 range)
      y          = (predicted * y_sd) + y_mean,  # Un-scale Y to 0-1
      lower      = (conf.low * y_sd) + y_mean,   # Un-scale CI lower
      upper      = (conf.high * y_sd) + y_mean   # Un-scale CI upper
    )
}) |> 
  mutate(sim_type = factor(sim_type, levels = sims))

scaled with dataset?

clip_data_summarized_scaled <- summarize_similarity_data_collapsed_scaled(model_data_scaled, extra_fields = c("dataset_name"))
clip_data_summarized_scaled_bucket <- summarize_similarity_data_collapsed_scaled(model_data_scaled, extra_fields = c("dataset_name", "age_bucket"))

plot_data_scaled <- clip_data_summarized_scaled |>
    rename_with(~ gsub("scaled_", "", .x)) |>
  pivot_longer(
    cols = all_of(sims),
    names_to = "sim_type",
    values_to = "similarity"
  ) |>
  mutate(
    sim_type = factor(sim_type, levels = sims)
  )


fig <- ggplot(plot_data_scaled, aes(similarity, mean_value, 
                             colour = dataset_name, fill = dataset_name)) +
  geom_hline(yintercept = 0.5, linetype = "dashed") +
  geom_point(size = 3, alpha = 0.1) +
  stat_smooth(method = "glm", geom = "line", alpha = 0.5, linewidth = 1) +
  
  # lmer population line + CI
  geom_ribbon(data = pred_scaled, inherit.aes = FALSE,
              aes(x = similarity, ymin = lower, ymax = upper),
              fill = "black", alpha = 0.15) +
  geom_line(data = pred_scaled, inherit.aes = FALSE,
            aes(x = similarity, y = y),
            colour = "black", alpha = 0.7, linewidth = 1.2) +
  
  facet_wrap(~ sim_type, nrow = 2, scales = "free_x",                        
             labeller = labeller(sim_type = facet_labs),
             strip.position = "top") +         
  scale_y_continuous(breaks = seq(0, 1, 0.2)) +
  coord_cartesian(ylim = c(0, 1)) +
  scale_color_discrete_qualitative(palette="Set 2") +
  labs(x = "Target–distractor similarity (z-scored)", 
       y = "Proportion target looking in critical window",
       colour = "Dataset Name", fill = "Dataset Name") +
  theme_classic(base_size=16) +
  theme(legend.position = "bottom",
        strip.text = element_text(size = 20),
        panel.spacing = unit(1, "lines"),
        plot.margin = margin(t = 5.5, r = 30, b = 5.5, l = 5.5, unit = "pt"))

ggsave(here("figures", "dataset_similarities_zscored.png"),
       fig, width = 16, height = 13, bg = "white")
ggsave(here("figures", "dataset_similarities_zscored.svg"),
       fig, width = 16, height = 13, device="pdf")

age

todo: ensure models used are not singular.

model_data_age <- model_data_scaled |> 
  filter(!is.na(age_bucket))

### 2. Fit Individual Models using Scaled Data
mods_age <- expand_grid(
  sim = sims,
  age_bucket = unique(model_data_age$age_bucket)
) |>
  mutate(
    model = map2(sim, age_bucket, ~
      fit_main_scaled(
        sim = .x,
        data = model_data_age |> filter(age_bucket == .y)
      )
    ),
    name = paste(sim, age_bucket, sep = "__")
  )

# Convert into a named list of models
mods_age <- setNames(mods_age$model, mods_age$name)

# Note: Ensure 'model_data_age' holds the unscaled target variable, 
# or use your original unscaled base data frame here.
y_mean <- mean(model_data_age$mean_target_looking_critical_window, na.rm = TRUE)
y_sd   <- sd(model_data_age$mean_target_looking_critical_window, na.rm = TRUE)


# 2. Re-build the prediction grid, handling scaled predictors and un-scaling outcomes
pred <- map_dfr(names(mods_age), function(nm) {
  m     <- mods_age[[nm]]
  parts <- strsplit(nm, "__")[[1]]
  st    <- parts[1]   # Similarity type (sim)
  ab    <- parts[2]   # Age bucket
  
  # Find the scaled similarity term used in this specific model
  sim_col <- grep("scaled_.*_similarity$", all.vars(formula(m)), value = TRUE)
  if(length(sim_col) == 0) {
    # Fallback if your scaled variables don't strictly follow that exact pattern
    sim_col <- grep("_similarity$", all.vars(formula(m)), value = TRUE)
  }
  
  # Generate predictions based on the z-scored term
  ggpredict(m, terms = paste0(sim_col, " [all]")) |>
    as.data.frame() |>
    transmute(
      sim_type   = st, 
      age_bucket = ab,
      similarity = x,                         # Keep X as the Z-score (-2 to +2 range)
      y          = (predicted * y_sd) + y_mean,  # Un-scale Y to 0-1
      lower      = (conf.low * y_sd) + y_mean,   # Un-scale CI lower
      upper      = (conf.high * y_sd) + y_mean   # Un-scale CI upper
    )
}) |> 
  mutate(sim_type = factor(sim_type, levels = sims))
# Note: Ensure plot_data_age_scaled has its similarity values z-scored 
# but uses the uniform column name 'similarity' after pivoting.

plot_data_scaled_bucket <- clip_data_summarized_scaled_bucket |>
    rename_with(~ gsub("scaled_", "", .x)) |>
  pivot_longer(
    cols = all_of(sims),
    names_to = "sim_type",
    values_to = "similarity"
  ) |>
  mutate(
    sim_type = factor(sim_type, levels = sims)
  )

age_scaled_plot <- ggplot(
  plot_data_scaled_bucket |> filter(!is.na(age_bucket)),
  aes(similarity, mean_value, colour = age_bucket, fill = age_bucket)
) +
  geom_hline(yintercept = 0.5, linetype = "dashed") +
  geom_point(alpha = 0.12, size = 2) +
  
  # lmer population ribbon + lines split/colored by age_bucket
  geom_ribbon(
    data = pred |> filter(!is.na(age_bucket)),
    aes(x = similarity, ymin = lower, ymax = upper, fill = age_bucket),
    inherit.aes = FALSE, alpha = 0.2, colour = NA
  ) +
  geom_line(
    data = pred |> filter(!is.na(age_bucket)),
    aes(x = similarity, y = y, colour = age_bucket),
    inherit.aes = FALSE, linewidth = 1.2
  ) +
  
  facet_wrap(
    ~ sim_type,
    nrow = 2,
    scales = "free_x",
    labeller = labeller(sim_type = facet_labs),
    strip.position = "top"
  ) +
  scale_y_continuous(breaks = seq(0, 1, 0.2)) +
  coord_cartesian(ylim = c(0, 1)) +
  scale_color_viridis_d(option = "mako", direction=-1, begin=0.15, end=0.7) +  
  scale_fill_viridis_d(option="mako", direction=-1, begin=0.15, end=0.7) +
  labs(
    x = "Target–distractor similarity (z-scored)",
    y = "Proportion target looking in critical window",
    colour = "Age bucket",
    fill = "Age bucket"
  ) +
  theme_classic(base_size = 16) +
  theme(
    legend.position = "bottom",
    strip.text = element_text(size = 20),
    panel.spacing = unit(1, "lines"),
    plot.margin = margin(t = 5.5, r = 30, b = 5.5, l = 5.5, unit = "pt")
  )

ggsave(here("figures", "age_similarities_zscored.png"),
       age_scaled_plot, width = 15, height = 13, bg = "white")