Exploratory analyses post CCN 2025

Published

August 27, 2025

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
here() starts at /Users/visuallearninglab/Documents/visvocab

Note: The package "relayer" is highly experimental. Use at your own risk.

Loading required package: viridisLite

Loading required package: lme4

Loading required package: Matrix


Attaching package: 'Matrix'


The following objects are masked from 'package:tidyr':

    expand, pack, unpack



Attaching package: 'lmerTest'


The following object is masked from 'package:lme4':

    lmer


The following object is masked from 'package:stats':

    step



Attaching package: 'cowplot'


The following object is masked from 'package:lubridate':

    stamp



Attaching package: 'scales'


The following object is masked from 'package:viridis':

    viridis_pal


The following object is masked from 'package:purrr':

    discard


The following object is masked from 'package:readr':

    col_factor



Attaching package: 'rlang'


The following objects are masked from 'package:purrr':

    %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
    flatten_raw, invoke, splice


Registering fonts with R
PROCESSED_DATA_PATH = here("data","main","processed_data")
trial_summary_data <- read.csv(here(PROCESSED_DATA_PATH, "level-trials_data.csv"))
trial_metadata <- read.csv(here("data", "metadata", "level-trialtype_data.csv"))
cvcl_similarities <- read.csv(here("data", "embeddings", "similarities-cvcl_data.csv"))
openclip_similarities <- read.csv(here("data", "embeddings", "similarities-openclip_data.csv"))
saycamvit_similarities <- read.csv(here("data", "embeddings", "similarities-saycamvit_data.csv"))
imagenetvit_similarities <- read.csv(here("data", "embeddings", "similarities-imagenetvit_data.csv"))
target_looking_trial_level <- read.csv(here("data", PROJECT_VERSION, "processed_data", "level-trialType_added-accuracy_data.csv"))
looking_data_summarized <- trial_summary_data |>
  filter(trial_exclusion == 0 & exclude_participant == 0 & exclude_participant_insufficient_data == 0) |>
  left_join(trial_metadata) |>
  arrange(AoA_Est_target)
Joining with `by = join_by(Trials.trialID, Trials.targetImage,
Trials.distractorImage, Trials.imagePair)`
clip_data_summarized <- summarize_similarity_data(looking_data_summarized)

openclip checkpoint number fix

## comparisons for openclip
total_checkpoints <- 256
num_selections <- 40

# Step 1: Log-spaced values in base 2
log_scale_checkpoints <- 2 ^ seq(log2(1), log2(total_checkpoints), length.out = num_selections)

# Step 2: Round to nearest integers
log_scale_checkpoints <- round(log_scale_checkpoints)

# Step 3: Remove duplicates and sort
unique_checkpoints <- sort(unique(log_scale_checkpoints))

helpers

fit_image_model <- function(summary_data) {
  lmer(
    scale(corrected_target_looking) ~ 
      scale(image_similarity) * scale(age_in_months) + 
      scale(AoA_Est_target) + 
      scale(MeanSaliencyDiff) +
      (1 | SubjectInfo.subjID) +
      (1 | Trials.targetImage),
    data = summary_data |> 
      mutate(age_in_months = SubjectInfo.testAge / 30)
  )
}

fit_text_model <- function(summary_data) {
  lmer(
    scale(corrected_target_looking) ~ 
      scale(text_similarity) * scale(age_in_months) + 
      scale(AoA_Est_target) + 
      scale(MeanSaliencyDiff) +
      (1 | SubjectInfo.subjID) +
      (1 | Trials.targetImage),
    data = summary_data |> 
      mutate(age_in_months = SubjectInfo.testAge / 30)
  )
}

multiple_similarity_effects_plot <- function(data, x_var, y_var = "mean_value", group_var, input_title, prod = FALSE) {
  sim_type <- strsplit(x_var, "_")[[1]][1]
  
  # Conditionally filter data if prod is TRUE
  if (prod) {
    label_data <- data %>% 
      filter(Trials.targetImage == "acorn" | Trials.distractorImage == "acorn") %>%
      mutate(label = paste("Target:", Trials.targetImage, "\nDistractor:", Trials.distractorImage))
  } else {
    label_data <- data %>%
      mutate(label = paste("Target:", Trials.targetImage, "\nDistractor:", Trials.distractorImage))
  }

  # Build the plot
  p <- ggplot(data, aes(x = .data[[x_var]], y = .data[[y_var]], color = .data[[group_var]])) +
    geom_hline(yintercept = 0, linetype = "dashed", size=1.5) +
    geom_point(size = 7, alpha = 0.8) +
    geom_smooth(alpha = 0.3, size = 0, method = "lm", show.legend = FALSE) +
    stat_smooth(geom = "line", alpha = 0.8, size = 1.5, method = "lm", show.legend = FALSE) +
    coord_cartesian(ylim = c(-0.12, 0.22)) +
    geom_label_repel(
        color = "black",          # force label text to be black
      fill = "white",           # optional: white label background
    segment.color = "black",  # force label line to be black
      data = label_data,
      aes(label = label),
      segment.alpha = 0.7,
      nudge_y = ifelse(label_data$Trials.targetImage == "bulldozer", -0.02, 0.02),
      force = 10,
      force_pull = 0.1,
      size = 5,
      segment.size = 1.2,
      point.padding = unit(1, "lines"),
      min.segment.length = 0,
      box.padding = unit(0.5, "lines"),
      max.overlaps = Inf,
      label.padding = unit(0.25, "lines"),
      label.r = unit(0.5, "lines"),
      show.legend = FALSE
    ) +
    ylab("Baseline-corrected\nproportion target looking") +
    xlab("Target-distractor embedding similarity") +
    scale_y_continuous(breaks = seq(-0.1, 0.2, by = 0.1)) +
    scale_color_manual(values = c(
      "image_similarity" = "#215D89",  # blue
      "text_similarity" = "#B8481C"    # orange
    )) +
    guides(color = "none") +  # Remove legend (optional)
    theme(
      text = element_text(size = 16, face = "bold"),
      axis.title.x = element_text(
        face = "bold", 
        size = 29,
        margin = margin(t = 15, r = 0, b = 0, l = 0)
      ),
      legend.key = element_blank(),
      axis.title.y = element_text(
        face = "bold", 
        size = 29,
        margin = margin(t = 0, r = 10, b = 0, l = 0)
      ),
      axis.text = element_text(size = 24, face = "bold"),
      legend.title = element_blank(),
      legend.text = element_text(size = 22, face = "bold"),
      legend.position = "bottom",
      strip.text = element_text(size = 28, face = "bold"),
      strip.background = element_rect(fill = "gray90", color = NA),
      strip.text.x = element_text(margin = margin(t = 8, b = 8)),
      panel.spacing = unit(0.5, "cm")
    ) +
    facet_wrap(
      facets = ~ .data[[group_var]],
      dir = "v",
      strip.position = "top",
      labeller = as_labeller(c(
        "image_similarity" = "Image Similarity",
        "text_similarity" = "Text Similarity"
      )),
      ncol = 1,
      scales = "free"
    )

  # Conditionally add custom x-axis scales
  if (prod) {
    p <- p + facetted_pos_scales(
      x = list(
        image_similarity = scale_x_continuous(
          breaks = seq(0.5, 0.9, by = 0.1),
          limits = c(0.43, 0.85)
        ),
        text_similarity = scale_x_continuous(
          breaks = seq(0.7, 0.9, by = 0.05),
          limits = c(0.7, 0.91)
        )
      )
    )
  }

  return(p)
}

Saliency

Cherry-picking the data to an extent but we do see order effects? No interaction between order and saliency though

order_ranked_trials <- looking_data_summarized |>
  group_by(SubjectInfo.subjID, Trials.targetImage) |>
  arrange(Trials.ordinal, .by_group = TRUE) |>
  mutate(
    slice_num = row_number(),
    order = case_when(
      slice_num == 1 ~ -0.5,
      slice_num == 2 ~ 0.5,
      TRUE ~ NA
    )
  )
main_image_effect_ranked <- lmer(scale(mean_target_looking_baseline_window) ~ (scale(order)*scale(MeanSaliencyDiff))
                    + (1 | SubjectInfo.subjID) 
                    + (1|Trials.targetImage), 
                    data = order_ranked_trials |> mutate(age_in_months = SubjectInfo.testAge / 30 ))
boundary (singular) fit: see help('isSingular')
# |> filter(Trials.trialType %in% c("easy", "hard")
summary(main_image_effect_ranked)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: scale(mean_target_looking_baseline_window) ~ (scale(order) *  
    scale(MeanSaliencyDiff)) + (1 | SubjectInfo.subjID) + (1 |  
    Trials.targetImage)
   Data: mutate(order_ranked_trials, age_in_months = SubjectInfo.testAge/30)

REML criterion at convergence: 7006.6

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-1.8938 -0.7271 -0.0227  0.7659  1.9311 

Random effects:
 Groups             Name        Variance Std.Dev.
 SubjectInfo.subjID (Intercept) 0.00000  0.0000  
 Trials.targetImage (Intercept) 0.01782  0.1335  
 Residual                       0.97509  0.9875  
Number of obs: 2476, groups:  SubjectInfo.subjID, 91; Trials.targetImage, 24

Fixed effects:
                                       Estimate Std. Error         df t value
(Intercept)                             0.01664    0.03437   20.14899   0.484
scale(order)                           -0.05932    0.02223 1059.09876  -2.669
scale(MeanSaliencyDiff)                 0.04764    0.03004   44.02760   1.586
scale(order):scale(MeanSaliencyDiff)   -0.01659    0.02168 1189.62912  -0.765
                                     Pr(>|t|)   
(Intercept)                           0.63348   
scale(order)                          0.00773 **
scale(MeanSaliencyDiff)               0.11995   
scale(order):scale(MeanSaliencyDiff)  0.44438   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(r) s(MSD)
scale(ordr)  0.084              
scl(MnSlnD)  0.024 -0.060       
scl():(MSD) -0.062 -0.127 -0.021
optimizer (nloptwrap) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')
looking_data_first_appearance <- looking_data_summarized |>
  group_by(SubjectInfo.subjID, Trials.targetImage) |>
  arrange(Trials.ordinal, .by_group = TRUE) |>
  slice(1) |>
  ungroup()

baseline_looking_first_appearance_model <- lmer(scale(mean_target_looking_baseline_window) ~ scale(MeanSaliencyDiff) 
                    + (1 | SubjectInfo.subjID)
                    + (1 | Trials.targetImage)
                    + (1 | Trials.imagePair), 
                    data = looking_data_first_appearance)
boundary (singular) fit: see help('isSingular')
# |> filter(Trials.trialType %in% c("easy", "hard")
summary(baseline_looking_first_appearance_model)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: 
scale(mean_target_looking_baseline_window) ~ scale(MeanSaliencyDiff) +  
    (1 | SubjectInfo.subjID) + (1 | Trials.targetImage) + (1 |  
    Trials.imagePair)
   Data: looking_data_first_appearance

REML criterion at convergence: 5484.5

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-1.8970 -0.7264 -0.0091  0.7935  1.8791 

Random effects:
 Groups             Name        Variance Std.Dev.
 SubjectInfo.subjID (Intercept) 0.003995 0.06321 
 Trials.targetImage (Intercept) 0.015223 0.12338 
 Trials.imagePair   (Intercept) 0.000000 0.00000 
 Residual                       0.977207 0.98854 
Number of obs: 1936, groups:  
SubjectInfo.subjID, 91; Trials.targetImage, 24; Trials.imagePair, 16

Fixed effects:
                         Estimate Std. Error        df t value Pr(>|t|)  
(Intercept)              0.001781   0.034434 22.555797   0.052   0.9592  
scale(MeanSaliencyDiff)  0.070894   0.031748 34.942005   2.233   0.0321 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr)
scl(MnSlnD) 0.004 
optimizer (nloptwrap) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')

multimodal similarity measures

clip_data <- read.csv(here("analysis", "ccn2025", "clip_data.csv"))
clip_data_multimodal <- clip_data |>
  mutate(
    # Split into two numbers (target, distractor)
    similarity_split = str_split(multimodal_similarity, ","),
    multimodal_similarity_target = sapply(similarity_split, function(x) as.numeric(x[1])) / 100,
    multimodal_similarity_distractor = sapply(similarity_split, function(x) as.numeric(x[2])) / 100,
    multimodal_similarity_luce = multimodal_similarity_distractor / 
                                 (multimodal_similarity_target + multimodal_similarity_distractor)
  ) |>
  select(-similarity_split)
looking_data_summarized_with_mm <- looking_data_summarized |> 
      mutate(age_in_months = SubjectInfo.testAge / 30) |> left_join(clip_data_multimodal |> transmute(Trials.targetImage = target, Trials.distractorImage = distractor, across(starts_with("multimodal_similarity_"))))
Joining with `by = join_by(Trials.targetImage, Trials.distractorImage)`

Distractor image to target word

summary(lmer(
    scale(corrected_target_looking) ~ 
      scale(multimodal_similarity_distractor) * scale(age_in_months) + 
      scale(AoA_Est_target) + 
      scale(MeanSaliencyDiff) +
      (1 | SubjectInfo.subjID) +
      (1 | Trials.targetImage),
    data = looking_data_summarized_with_mm |> 
      mutate(age_in_months = SubjectInfo.testAge / 30)))
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: 
scale(corrected_target_looking) ~ scale(multimodal_similarity_distractor) *  
    scale(age_in_months) + scale(AoA_Est_target) + scale(MeanSaliencyDiff) +  
    (1 | SubjectInfo.subjID) + (1 | Trials.targetImage)
   Data: 
mutate(looking_data_summarized_with_mm, age_in_months = SubjectInfo.testAge/30)

REML criterion at convergence: 7011.7

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.82417 -0.63158 -0.02055  0.66346  2.78013 

Random effects:
 Groups             Name        Variance Std.Dev.
 SubjectInfo.subjID (Intercept) 0.017141 0.13092 
 Trials.targetImage (Intercept) 0.009134 0.09557 
 Residual                       0.962883 0.98127 
Number of obs: 2476, groups:  SubjectInfo.subjID, 91; Trials.targetImage, 24

Fixed effects:
                                                               Estimate
(Intercept)                                                  -8.176e-03
scale(multimodal_similarity_distractor)                      -4.134e-02
scale(age_in_months)                                          5.818e-02
scale(AoA_Est_target)                                        -8.374e-02
scale(MeanSaliencyDiff)                                      -1.157e-03
scale(multimodal_similarity_distractor):scale(age_in_months)  1.274e-03
                                                             Std. Error
(Intercept)                                                   3.140e-02
scale(multimodal_similarity_distractor)                       2.411e-02
scale(age_in_months)                                          2.405e-02
scale(AoA_Est_target)                                         2.799e-02
scale(MeanSaliencyDiff)                                       2.617e-02
scale(multimodal_similarity_distractor):scale(age_in_months)  1.962e-02
                                                                     df t value
(Intercept)                                                   2.704e+01  -0.260
scale(multimodal_similarity_distractor)                       9.900e+01  -1.714
scale(age_in_months)                                          9.102e+01   2.419
scale(AoA_Est_target)                                         2.223e+01  -2.991
scale(MeanSaliencyDiff)                                       3.678e+01  -0.044
scale(multimodal_similarity_distractor):scale(age_in_months)  2.388e+03   0.065
                                                             Pr(>|t|)   
(Intercept)                                                   0.79655   
scale(multimodal_similarity_distractor)                       0.08960 . 
scale(age_in_months)                                          0.01756 * 
scale(AoA_Est_target)                                         0.00668 **
scale(MeanSaliencyDiff)                                       0.96499   
scale(multimodal_similarity_distractor):scale(age_in_months)  0.94823   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(m__) scl(g__) s(AA_E s(MSD)
scl(mltm__) -0.008                                
scl(g_n_mn)  0.003 -0.009                         
scl(AA_Es_)  0.011 -0.194    0.011                
scl(MnSlnD)  0.019  0.063   -0.010    0.014       
sc(__):(__) -0.001 -0.002    0.006    0.012 -0.006

Luce’s ratio

summary(lmer(
    scale(corrected_target_looking) ~ 
      scale(multimodal_similarity_luce) * scale(age_in_months) + 
      scale(AoA_Est_target) + 
      scale(MeanSaliencyDiff) +
      (1 | SubjectInfo.subjID) +
      (1 | Trials.targetImage),
    data = looking_data_summarized_with_mm |> 
      mutate(age_in_months = SubjectInfo.testAge / 30)))
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: scale(corrected_target_looking) ~ scale(multimodal_similarity_luce) *  
    scale(age_in_months) + scale(AoA_Est_target) + scale(MeanSaliencyDiff) +  
    (1 | SubjectInfo.subjID) + (1 | Trials.targetImage)
   Data: 
mutate(looking_data_summarized_with_mm, age_in_months = SubjectInfo.testAge/30)

REML criterion at convergence: 7013.4

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.82392 -0.62764 -0.01942  0.67034  2.79055 

Random effects:
 Groups             Name        Variance Std.Dev.
 SubjectInfo.subjID (Intercept) 0.01709  0.1307  
 Trials.targetImage (Intercept) 0.01025  0.1012  
 Residual                       0.96315  0.9814  
Number of obs: 2476, groups:  SubjectInfo.subjID, 91; Trials.targetImage, 24

Fixed effects:
                                                         Estimate Std. Error
(Intercept)                                            -8.673e-03  3.215e-02
scale(multimodal_similarity_luce)                      -2.700e-02  2.551e-02
scale(age_in_months)                                    5.808e-02  2.404e-02
scale(AoA_Est_target)                                  -9.044e-02  2.833e-02
scale(MeanSaliencyDiff)                                -3.081e-04  2.676e-02
scale(multimodal_similarity_luce):scale(age_in_months)  4.269e-03  1.958e-02
                                                               df t value
(Intercept)                                             2.649e+01  -0.270
scale(multimodal_similarity_luce)                       5.769e+01  -1.059
scale(age_in_months)                                    9.106e+01   2.416
scale(AoA_Est_target)                                   2.143e+01  -3.193
scale(MeanSaliencyDiff)                                 3.743e+01  -0.012
scale(multimodal_similarity_luce):scale(age_in_months)  2.385e+03   0.218
                                                       Pr(>|t|)   
(Intercept)                                              0.7894   
scale(multimodal_similarity_luce)                        0.2942   
scale(age_in_months)                                     0.0177 * 
scale(AoA_Est_target)                                    0.0043 **
scale(MeanSaliencyDiff)                                  0.9909   
scale(multimodal_similarity_luce):scale(age_in_months)   0.8275   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(m__) scl(g__) s(AA_E s(MSD)
scl(mltm__) -0.008                                
scl(g_n_mn)  0.003 -0.008                         
scl(AA_Es_)  0.011 -0.080    0.009                
scl(MnSlnD)  0.020  0.074   -0.010    0.021       
sc(__):(__) -0.002 -0.005    0.010    0.010 -0.003

OpenCLIP

OpenCLIP training

openclip_similarities_combined <- openclip_similarities |>
  rename(word_a = word1, word_b = word2) |>
  bind_rows(
    openclip_similarities |>
      rename(word_a = word2, word_b = word1)
  )
looking_data_w_openclip <- looking_data_summarized |>
  select(-text_similarity, -multimodal_similarity, -image_similarity) |>
  left_join(openclip_similarities_combined, by = c("Trials.distractorImage"="word_a", "Trials.targetImage"="word_b"))
Warning in left_join(select(looking_data_summarized, -text_similarity, -multimodal_similarity, : Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 1 of `x` matches multiple rows in `y`.
ℹ Row 6 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
  "many-to-many"` to silence this warning.
common_cols <- intersect(colnames(looking_data_w_openclip), colnames(looking_data_summarized))

looking_data_w_openclip_clip <- looking_data_w_openclip |> bind_rows(
    looking_data_summarized %>%
      mutate(epoch = 33) %>%
      select(all_of(common_cols), epoch)
  )


openclip_data_summarized <- summarize_similarity_data(looking_data_w_openclip, extra_fields=c("epoch"))
openclip_clip_data_summarized <- summarize_similarity_data(looking_data_w_openclip_clip, extra_fields=c("epoch"))
openclip_age_half_summarized <- looking_data_w_openclip |>
  add_age_split() |>
  summarize_similarity_data(extra_fields = c("age_half", "epoch"))

# Function to calculate Pearson's correlation per epoch
calculate_correlations <- function(data, x_var, y_var, group_var = c("epoch"), conf_level = 0.95) {
  data |>
    group_by(across(all_of(group_var))) |>
    summarize(
      {
        cor_test <- cor.test(.data[[x_var]], .data[[y_var]], method = "pearson", conf.level = conf_level)
        tibble(
          pearson_cor = cor_test$estimate,
          p_value = cor_test$p.value,
          ci_lower = cor_test$conf.int[1],
          ci_upper = cor_test$conf.int[2]
        )
      },
      .groups = "drop"
    )
}

image_correlation_results_age_split <- calculate_correlations(openclip_age_half_summarized, "image_similarity", "mean_value", group_var=c("epoch", "age_half"))

text_correlation_results <- calculate_correlations(openclip_data_summarized, "text_similarity", "mean_value", group_var=c("epoch")) %>%
  mutate(similarity_type = "Text similarity")
image_correlation_results <- calculate_correlations(openclip_data_summarized, "image_similarity", "mean_value", group_var=c("epoch")) %>%
  mutate(similarity_type = "Image similarity")

combined_correlation_results <- bind_rows(text_correlation_results, image_correlation_results)

CCN plot

oc_plot <- ggplot(combined_correlation_results |> 
         rowwise() |> 
         mutate(epoch = unique_checkpoints[[epoch]]), 
       aes(x = log(epoch), y = pearson_cor, color = similarity_type)) +
  #geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.1) +
  #geom_hline(yintercept = 0, linetype = "dashed", color = "gray40") +
  geom_point(size = 7, alpha = 0.8) +
  geom_smooth(alpha = 0.3, size = 0, span = 1, show.legend = FALSE) +  
  stat_smooth(geom = "line", alpha = 0.8, size = 1.5, span = 1, show.legend = FALSE) + 
  labs(x = "log (OpenCLIP epoch)",
       y = "Pearson's\ncorrelation (r)",
       color = "Similarity Type") +
  scale_color_manual(values = c(
    "Text similarity" = "#B8481C",           
    "Image similarity" = "#215D89"
  )) +  
   scale_fill_manual(values = c(
    "Text similarity" = "#B8481C",           
    "Image similarity" = "#215D89"
  )) +  
  scale_shape_manual(values = c(`TRUE` = 16, `FALSE` = 1)) + 
  theme(
    text = element_text(size=26, face="bold"),
    legend.position = c(0.25, 0.8),  # Adjust x/y values as needed
    legend.background = element_rect(fill = "white", color = "gray80"),
    legend.key = element_blank(),
    legend.title = element_blank(),
    legend.text = element_text(size = 26, face = "bold"),
    axis.title = element_text(size = 28),
    axis.title.x = element_text(
      face = "bold", 
      size = 33,
      margin = margin(t = 15, r = 0, b = 0, l = 0)
    ),
    axis.title.y = element_text(
      face = "bold", 
      size = 33,
      margin = margin(t = 0, r = 10, b = 0, l = 0)
    ),
    axis.text = element_text(size = 30, face = "bold")
  ) 
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
3.5.0.
ℹ Please use the `legend.position.inside` argument of `theme()` instead.
oc_plot
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: No shared levels found between `names(values)` of the manual scale and the
data's fill values.
Warning: No shared levels found between `names(values)` of the manual scale and the
data's shape values.

ggsave(here("figures",PROJECT_VERSION,"oc_plot_poster.svg"),oc_plot,width=10, height=5,bg = "white",device="pdf")
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's shape values.
# Function to calculate which trials correlate most/least with fitted regression lines
calculate_trial_fitted_correlations <- function(data, x_var, y_var, group_var = c("epoch")) {
  
  # Calculate fitted values from regression for each group
  fitted_data <- data |>
    group_by(across(all_of(group_var))) |>
    do({
      model <- lm(as.formula(paste(y_var, "~", x_var)), data = .)
      . |>
        mutate(fitted_value = predict(model))
    }) |>
    ungroup()
  
  # Calculate residuals for each trial (how far each trial deviates from fitted line)
  trial_residuals <- fitted_data |>
    mutate(residual = .data[[y_var]] - fitted_value,
           abs_residual = abs(residual)) |>
    group_by(across(c(all_of(group_var), "Trials.trialID"))) |>
    summarize(
      mean_residual = mean(residual, na.rm = TRUE),
      mean_abs_residual = mean(abs_residual, na.rm = TRUE),
      # Convert to correlation-like metric (negative residual = better fit)
      correlation_with_fitted = -mean_abs_residual, # negative so higher = better fit
      n_points = n(),
      .groups = "drop"
    )
  
  return(trial_residuals)
}

# Function to find most and least correlated trials
find_extreme_trial_correlations <- function(trial_cors, group_var = c("epoch")) {
  trial_cors |>
    group_by(across(all_of(group_var))) |>
    filter(!is.na(correlation_with_fitted)) |>
    reframe(
      most_correlated_trial = Trials.trialID[which.max(correlation_with_fitted)],
      highest_correlation = max(correlation_with_fitted, na.rm = TRUE),
      highest_correlation_abs = -min(mean_abs_residual, na.rm = TRUE), # best fit (smallest residual)
      least_correlated_trial = Trials.trialID[which.min(correlation_with_fitted)],
      lowest_correlation = min(correlation_with_fitted, na.rm = TRUE), 
      lowest_correlation_abs = -max(mean_abs_residual, na.rm = TRUE), # worst fit (largest residual)
      n_valid_trials = n()
    )
}

# Calculate for TEXT SIMILARITY by epoch
text_trial_correlations_by_epoch <- calculate_trial_fitted_correlations(
  openclip_data_summarized, 
  "text_similarity", 
  "mean_value", 
  group_var = c("epoch")
)

text_extreme_by_epoch <- find_extreme_trial_correlations(
  text_trial_correlations_by_epoch, 
  group_var = c("epoch")
) |>
  mutate(similarity_type = "Text similarity")

# Calculate for IMAGE SIMILARITY by epoch  
image_trial_correlations_by_epoch <- calculate_trial_fitted_correlations(
  openclip_data_summarized, 
  "image_similarity", 
  "mean_value", 
  group_var = c("epoch")
)

image_extreme_by_epoch <- find_extreme_trial_correlations(
  image_trial_correlations_by_epoch, 
  group_var = c("epoch")
) |>
  mutate(similarity_type = "Image similarity")

# Calculate OVERALL across all epochs (averaged across training)
# For text similarity - average each trial's values across epochs first
text_trial_averages <- openclip_data_summarized |>
  group_by(Trials.trialID) |>
  summarize(
    mean_text_similarity = mean(text_similarity, na.rm = TRUE),
    mean_value_avg = mean(mean_value, na.rm = TRUE),
    .groups = "drop"
  )

# Fit overall regression model for text
text_overall_model <- lm(mean_value_avg ~ mean_text_similarity, data = text_trial_averages)
text_trial_averages$fitted_value <- predict(text_overall_model)

# Calculate residuals for overall text
text_overall_correlations <- text_trial_averages |>
  mutate(
    residual = mean_value_avg - fitted_value,
    abs_residual = abs(residual),
    correlation_with_fitted = -abs_residual # negative so higher = better fit
  ) |>
  reframe(
    most_correlated_trial = Trials.trialID[which.max(correlation_with_fitted)],
    highest_correlation = max(correlation_with_fitted),
    highest_correlation_abs = max(correlation_with_fitted),
    least_correlated_trial = Trials.trialID[which.min(correlation_with_fitted)],
    lowest_correlation = min(correlation_with_fitted),
    lowest_correlation_abs = min(correlation_with_fitted),
    n_valid_trials = n(),
    epoch = "Overall",
    similarity_type = "Text similarity"
  )

# Same for image similarity overall
image_trial_averages <- openclip_data_summarized |>
  group_by(Trials.trialID) |>
  summarize(
    mean_image_similarity = mean(image_similarity, na.rm = TRUE), 
    mean_value_avg = mean(mean_value, na.rm = TRUE),
    .groups = "drop"
  )

image_overall_model <- lm(mean_value_avg ~ mean_image_similarity, data = image_trial_averages)
image_trial_averages$fitted_value <- predict(image_overall_model)

image_overall_correlations <- image_trial_averages |>
  mutate(
    residual = mean_value_avg - fitted_value,
    abs_residual = abs(residual),
    correlation_with_fitted = -abs_residual
  ) |>
  reframe(
    most_correlated_trial = Trials.trialID[which.max(correlation_with_fitted)],
    highest_correlation = max(correlation_with_fitted),
    highest_correlation_abs = max(correlation_with_fitted),
    least_correlated_trial = Trials.trialID[which.min(correlation_with_fitted)],
    lowest_correlation = min(correlation_with_fitted),
    lowest_correlation_abs = min(correlation_with_fitted),
    n_valid_trials = n(),
    epoch = "Overall",
    similarity_type = "Image similarity"
  )

# Combine all extreme results (convert epoch to character for consistency)
all_extreme_correlations <- bind_rows(
  text_extreme_by_epoch |> mutate(epoch = as.character(epoch)),
  image_extreme_by_epoch |> mutate(epoch = as.character(epoch)), 
  text_overall_correlations,
  image_overall_correlations
) |>
  arrange(similarity_type, epoch)

# Also get all trial correlations (not just extremes)
all_text_trial_correlations <- bind_rows(
  text_trial_correlations_by_epoch |> mutate(epoch = as.character(epoch), similarity_type = "Text similarity"),
  text_trial_averages |> 
    mutate(
      residual = mean_value_avg - fitted_value,
      abs_residual = abs(residual),
      correlation_with_fitted = -abs_residual,
      epoch = "Overall",
      similarity_type = "Text similarity",
      n_points = 1
    ) |>
    select(Trials.trialID, correlation_with_fitted, n_points, epoch, similarity_type)
)

all_image_trial_correlations <- bind_rows(
  image_trial_correlations_by_epoch |> mutate(epoch = as.character(epoch), similarity_type = "Image similarity"),
  image_trial_averages |>
    mutate(
      residual = mean_value_avg - fitted_value, 
      abs_residual = abs(residual),
      correlation_with_fitted = -abs_residual,
      epoch = "Overall",
      similarity_type = "Image similarity",
      n_points = 1
    ) |>
    select(Trials.trialID, correlation_with_fitted, n_points, epoch, similarity_type)
)

# Combine all individual trial correlations
all_trial_correlations <- bind_rows(
  all_text_trial_correlations,
  all_image_trial_correlations
) |>
  arrange(similarity_type, epoch, Trials.trialID)

# Display results
print("SUMMARY: Trials that correlate most and least with fitted regression lines:")
[1] "SUMMARY: Trials that correlate most and least with fitted regression lines:"
print(all_extreme_correlations)
# A tibble: 66 × 9
   epoch most_correlated_trial        highest_correlation highest_correlation_…¹
   <chr> <chr>                                      <dbl>                  <dbl>
 1 1     hard-turkey-goat-distractor            -0.000804              -0.000804
 2 10    easy-turkey-swan                       -0.00377               -0.00377 
 3 11    hard-cheese-butter                     -0.000172              -0.000172
 4 12    easy-turkey-swan                       -0.00175               -0.00175 
 5 13    hard-turtle-frog-distractor            -0.00537               -0.00537 
 6 14    easy-turkey-swan                       -0.000443              -0.000443
 7 15    hard-cheese-butter                     -0.00132               -0.00132 
 8 16    hard-turkey-goat-distractor            -0.000686              -0.000686
 9 17    hard-cheese-butter-distract…           -0.000574              -0.000574
10 18    hard-cheese-butter                     -0.00338               -0.00338 
# ℹ 56 more rows
# ℹ abbreviated name: ¹​highest_correlation_abs
# ℹ 5 more variables: least_correlated_trial <chr>, lowest_correlation <dbl>,
#   lowest_correlation_abs <dbl>, n_valid_trials <int>, similarity_type <chr>
print("\nDETAILED: All trial correlations with fitted lines (first 20 rows):")
[1] "\nDETAILED: All trial correlations with fitted lines (first 20 rows):"
print(head(all_trial_correlations, 20))
# A tibble: 20 × 7
   epoch Trials.trialID   mean_residual mean_abs_residual correlation_with_fit…¹
   <chr> <chr>                    <dbl>             <dbl>                  <dbl>
 1 1     easy-acorn-key        -0.00601           0.00601               -0.00601
 2 1     easy-acorn-key-…       0.0546            0.0546                -0.0546 
 3 1     easy-bulldozer-…      -0.110             0.110                 -0.110  
 4 1     easy-bulldozer-…       0.0710            0.0710                -0.0710 
 5 1     easy-cheese-mud        0.0772            0.0772                -0.0772 
 6 1     easy-cheese-mud…      -0.0827            0.0827                -0.0827 
 7 1     easy-potato-gla…       0.0861            0.0861                -0.0861 
 8 1     easy-potato-gla…       0.0962            0.0962                -0.0962 
 9 1     easy-snail-cow        -0.0445            0.0445                -0.0445 
10 1     easy-snail-cow-…      -0.00473           0.00473               -0.00473
11 1     easy-squirrel-e…       0.0880            0.0880                -0.0880 
12 1     easy-squirrel-e…      -0.0171            0.0171                -0.0171 
13 1     easy-turkey-swan      -0.0220            0.0220                -0.0220 
14 1     easy-turkey-swa…      -0.0674            0.0674                -0.0674 
15 1     easy-turtle-hor…      -0.0303            0.0303                -0.0303 
16 1     easy-turtle-hor…       0.0341            0.0341                -0.0341 
17 1     hard-acorn-coco…      -0.0562            0.0562                -0.0562 
18 1     hard-acorn-coco…      -0.0614            0.0614                -0.0614 
19 1     hard-bulldozer-…      -0.0432            0.0432                -0.0432 
20 1     hard-bulldozer-…      -0.00998           0.00998               -0.00998
# ℹ abbreviated name: ¹​correlation_with_fitted
# ℹ 2 more variables: n_points <dbl>, similarity_type <chr>
print(paste("\nTotal number of trial-epoch-similarity combinations:", nrow(all_trial_correlations)))
[1] "\nTotal number of trial-epoch-similarity combinations: 2112"
# Show summary statistics
print("\nSummary statistics for trial correlations:")
[1] "\nSummary statistics for trial correlations:"
summary_stats <- all_trial_correlations |>
  group_by(similarity_type, epoch) |>
  summarize(
    n_trials = n(),
    mean_correlation = mean(correlation_with_fitted, na.rm = TRUE),
    median_correlation = median(correlation_with_fitted, na.rm = TRUE),
    sd_correlation = sd(correlation_with_fitted, na.rm = TRUE),
    .groups = "drop"
  )
print(summary_stats)
# A tibble: 66 × 6
   similarity_type  epoch n_trials mean_correlation median_correlation
   <chr>            <chr>    <int>            <dbl>              <dbl>
 1 Image similarity 1           32          -0.0511            -0.0509
 2 Image similarity 10          32          -0.0488            -0.0460
 3 Image similarity 11          32          -0.0501            -0.0470
 4 Image similarity 12          32          -0.0487            -0.0483
 5 Image similarity 13          32          -0.0526            -0.0548
 6 Image similarity 14          32          -0.0508            -0.0470
 7 Image similarity 15          32          -0.0504            -0.0509
 8 Image similarity 16          32          -0.0505            -0.0525
 9 Image similarity 17          32          -0.0483            -0.0461
10 Image similarity 18          32          -0.0516            -0.0519
# ℹ 56 more rows
# ℹ 1 more variable: sd_correlation <dbl>
correlation_plot_faceted <- all_trial_correlations |>
  filter(epoch == "Overall") |>
  # Order by correlation within each similarity type
  group_by(similarity_type) |>
  arrange(desc(correlation_with_fitted), .by_group = TRUE) |>
  mutate(trial_order = row_number()) |>
  ungroup() |>
  ggplot(aes(x = reorder(Trials.trialID, correlation_with_fitted), y = correlation_with_fitted)) +
  geom_col(aes(fill = similarity_type), show.legend = FALSE) +
  facet_wrap(~ similarity_type, scales = "free_x", ncol = 1) +
  scale_fill_manual(values = c("Text similarity" = "steelblue", "Image similarity" = "coral")) +
  labs(
    title = "Trial Correlations with Fitted Regression Lines (Overall)", 
    subtitle = "Separate panels for Text and Image similarity",
    x = "Trial ID",
    y = "Correlation with Fitted Line"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
    strip.text = element_text(size = 12, face = "bold"),
    plot.title = element_text(size = 14, face = "bold")
  )

print(correlation_plot_faceted)

Comparing to random values

library(dplyr)
library(purrr)

get_randomized_correlation <- function(data, sim_type, group_var = c("epoch")) {
  randomized_values <- data %>%
    distinct(Trials.trialID, .keep_all = TRUE) %>%
    mutate(shuffled_value = sample(mean_value)) %>%
    select(Trials.trialID, shuffled_value)
  randomized_data <- data %>%
    left_join(randomized_values, by = "Trials.trialID") %>%
    mutate(mean_value = shuffled_value) %>%
    select(-shuffled_value)
  
  calculate_correlations(randomized_data, sim_type, "mean_value", group_var = group_var)
}

image_correlations_randomized <- get_randomized_correlation(openclip_data_summarized, "image_similarity") |>
  mutate(similarity_type = "Image randomized")

text_correlations_randomized <- get_randomized_correlation(openclip_data_summarized, "text_similarity") |>
  mutate(similarity_type = "Text randomized")

n_boot <- 100
boot_results <- map_dfr(1:n_boot, function(i) {
  image_corr <- get_randomized_correlation(openclip_data_summarized, "image_similarity") %>%
    mutate(similarity_type = "Image randomized bootstrapped", bootstrap = i)
  
  text_corr <- get_randomized_correlation(openclip_data_summarized, "text_similarity") %>%
    mutate(similarity_type = "Text randomized bootstrapped", bootstrap = i)
  
  bind_rows(image_corr, text_corr)
})
boot_summary <- boot_results %>%
  group_by(epoch, similarity_type) %>%
  summarize(
    pearson_cor = mean(pearson_cor, na.rm = TRUE),
    ci_lower = quantile(pearson_cor, 0.025, na.rm = TRUE),
    ci_upper = quantile(pearson_cor, 0.975, na.rm = TRUE),
    .groups = "drop"
  )

combined_correlation_results <- bind_rows(combined_correlation_results, image_correlations_randomized, text_correlations_randomized, boot_summary) %>%
  mutate(is_randomized = grepl("randomized", similarity_type) & !grepl("bootstrap", similarity_type))

ggplot(combined_correlation_results |> rowwise() |> mutate(epoch = unique_checkpoints[[epoch]]), aes(x = log(epoch), y = pearson_cor, color = similarity_type, alpha = is_randomized)) +
  geom_point(size = 3, aes(shape = !is.na(p_value) & p_value < 0.05)) +
  geom_smooth(span = 2, se = FALSE) +
  #geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.1) +
   geom_hline(yintercept = 0, linetype = "dashed", color = "gray40") +
  labs(title = "Correlation Across Open-CLIP Training",
       x = "log (Epoch)",
       y = "Pearson correlation between\nbaseline-corrected prop. target looking\nand embedding similarity",
       color = "Similarity Type",
       shape = "p < 0.05") +
   scale_color_manual(values = c(
    "Text" = "#1f77b4",           # Default blue
    "Image" = "#215D89",          # Default orange
    "Image randomized bootstrapped" = "#ffbb78",           # Lighter orange
    "Text randomized bootstrapped" = "#1fa2b0",
    "Image randomized" = "#ffbb78",  
    "Text randomized" = "#1fa2b0"
  )) +
  scale_alpha_manual(values = c(`TRUE` = 0.3, `FALSE` = 1)) +
  guides(alpha = "none") +
  scale_shape_manual(values = c(`TRUE` = 16, `FALSE` = 1)) +  # Filled vs hollow dots for significance
  theme_minimal()
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

It is unadvised to do model comparisons with such few items! However, interesting future direction.

checking out epoch 3 which had the highest correlation

still had to remove the random slope for image_similarity because of a singular fit but we definitely see a stronger effect

stopifnot(nrow(looking_data_w_openclip |> filter(epoch == 3)) == nrow(looking_data_summarized))
epoch3_data_summarized <- summarize_similarity_data(looking_data_w_openclip |> filter(epoch == 3))
openclip_data_long <-  epoch3_data_summarized |>  
  pivot_longer(cols = c("text_similarity", "image_similarity"), names_to = "sim_type", values_to = "similarity")
all_clip_plot <- multiple_similarity_effects_plot(openclip_data_long, "similarity", group_var="sim_type", input_title="Looking time and Open CLIP epoch 3 embedding correlations")
all_clip_plot
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'

Comparing models from different epochs

library(MuMIn)
epoch3_image_effect <- fit_image_model(looking_data_w_openclip |> filter(epoch == 3))
summary(epoch3_image_effect)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: 
scale(corrected_target_looking) ~ scale(image_similarity) * scale(age_in_months) +  
    scale(AoA_Est_target) + scale(MeanSaliencyDiff) + (1 | SubjectInfo.subjID) +  
    (1 | Trials.targetImage)
   Data: mutate(summary_data, age_in_months = SubjectInfo.testAge/30)

REML criterion at convergence: 7005.6

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.81337 -0.62233 -0.01623  0.66566  2.78508 

Random effects:
 Groups             Name        Variance Std.Dev.
 SubjectInfo.subjID (Intercept) 0.017035 0.13052 
 Trials.targetImage (Intercept) 0.007234 0.08506 
 Residual                       0.961386 0.98050 
Number of obs: 2476, groups:  SubjectInfo.subjID, 91; Trials.targetImage, 24

Fixed effects:
                                               Estimate Std. Error         df
(Intercept)                                  -7.377e-03  3.003e-02  2.768e+01
scale(image_similarity)                      -5.637e-02  2.317e-02  1.240e+02
scale(age_in_months)                          5.790e-02  2.402e-02  9.101e+01
scale(AoA_Est_target)                        -7.868e-02  2.680e-02  2.326e+01
scale(MeanSaliencyDiff)                       2.785e-03  2.507e-02  3.511e+01
scale(image_similarity):scale(age_in_months) -3.601e-02  1.977e-02  2.384e+03
                                             t value Pr(>|t|)   
(Intercept)                                   -0.246  0.80779   
scale(image_similarity)                       -2.433  0.01641 * 
scale(age_in_months)                           2.411  0.01792 * 
scale(AoA_Est_target)                         -2.936  0.00737 **
scale(MeanSaliencyDiff)                        0.111  0.91216   
scale(image_similarity):scale(age_in_months)  -1.822  0.06864 . 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) sc(__) s(AA_E s(MSD)
scl(mg_sml) -0.004                            
scl(g_n_mn)  0.003 -0.009                     
scl(AA_Es_)  0.009 -0.232  0.011              
scl(MnSlnD)  0.018 -0.015 -0.009  0.027       
scl(_):(__) -0.004 -0.010  0.009  0.010 -0.007
epoch32_image_effect <- fit_image_model(looking_data_w_openclip |> filter(epoch == 28))
summary(epoch32_image_effect)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: 
scale(corrected_target_looking) ~ scale(image_similarity) * scale(age_in_months) +  
    scale(AoA_Est_target) + scale(MeanSaliencyDiff) + (1 | SubjectInfo.subjID) +  
    (1 | Trials.targetImage)
   Data: mutate(summary_data, age_in_months = SubjectInfo.testAge/30)

REML criterion at convergence: 7010.1

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.82004 -0.62033 -0.01545  0.67083  2.79780 

Random effects:
 Groups             Name        Variance Std.Dev.
 SubjectInfo.subjID (Intercept) 0.01717  0.1310  
 Trials.targetImage (Intercept) 0.01005  0.1002  
 Residual                       0.96185  0.9807  
Number of obs: 2476, groups:  SubjectInfo.subjID, 91; Trials.targetImage, 24

Fixed effects:
                                               Estimate Std. Error         df
(Intercept)                                  -8.310e-03  3.202e-02  2.745e+01
scale(image_similarity)                      -4.673e-02  2.525e-02  7.178e+01
scale(age_in_months)                          5.803e-02  2.405e-02  9.100e+01
scale(AoA_Est_target)                        -8.175e-02  2.874e-02  2.256e+01
scale(MeanSaliencyDiff)                       1.432e-03  2.658e-02  3.786e+01
scale(image_similarity):scale(age_in_months) -2.031e-02  1.967e-02  2.380e+03
                                             t value Pr(>|t|)   
(Intercept)                                   -0.260  0.79714   
scale(image_similarity)                       -1.851  0.06835 . 
scale(age_in_months)                           2.413  0.01783 * 
scale(AoA_Est_target)                         -2.845  0.00928 **
scale(MeanSaliencyDiff)                        0.054  0.95732   
scale(image_similarity):scale(age_in_months)  -1.032  0.30206   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) sc(__) s(AA_E s(MSD)
scl(mg_sml) -0.008                            
scl(g_n_mn)  0.003 -0.007                     
scl(AA_Es_)  0.012 -0.212  0.010              
scl(MnSlnD)  0.021  0.010 -0.009  0.025       
scl(_):(__) -0.003 -0.002  0.004  0.003 -0.006
fully_trained_clip_effect <- fit_image_model(looking_data_summarized)
summary(fully_trained_clip_effect)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: 
scale(corrected_target_looking) ~ scale(image_similarity) * scale(age_in_months) +  
    scale(AoA_Est_target) + scale(MeanSaliencyDiff) + (1 | SubjectInfo.subjID) +  
    (1 | Trials.targetImage)
   Data: mutate(summary_data, age_in_months = SubjectInfo.testAge/30)

REML criterion at convergence: 7009.3

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.82154 -0.62402 -0.01811  0.67410  2.81296 

Random effects:
 Groups             Name        Variance Std.Dev.
 SubjectInfo.subjID (Intercept) 0.017207 0.13118 
 Trials.targetImage (Intercept) 0.009785 0.09892 
 Residual                       0.961620 0.98062 
Number of obs: 2476, groups:  SubjectInfo.subjID, 91; Trials.targetImage, 24

Fixed effects:
                                               Estimate Std. Error         df
(Intercept)                                  -8.565e-03  3.185e-02  2.657e+01
scale(image_similarity)                      -4.591e-02  2.429e-02  1.146e+02
scale(age_in_months)                          5.786e-02  2.406e-02  9.098e+01
scale(AoA_Est_target)                        -7.913e-02  2.886e-02  2.245e+01
scale(MeanSaliencyDiff)                       6.125e-04  2.645e-02  3.581e+01
scale(image_similarity):scale(age_in_months) -2.562e-02  1.970e-02  2.385e+03
                                             t value Pr(>|t|)  
(Intercept)                                   -0.269   0.7901  
scale(image_similarity)                       -1.890   0.0612 .
scale(age_in_months)                           2.405   0.0182 *
scale(AoA_Est_target)                         -2.742   0.0118 *
scale(MeanSaliencyDiff)                        0.023   0.9817  
scale(image_similarity):scale(age_in_months)  -1.301   0.1935  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) sc(__) s(AA_E s(MSD)
scl(mg_sml) -0.004                            
scl(g_n_mn)  0.003 -0.005                     
scl(AA_Es_)  0.011 -0.255  0.010              
scl(MnSlnD)  0.021  0.029 -0.009  0.019       
scl(_):(__)  0.000 -0.008  0.006  0.003 -0.010

Plotting full lmer model values across epochs

library(dplyr)
library(MuMIn)
library(ggplot2)
library(patchwork)  # for combining plots

Attaching package: 'patchwork'
The following object is masked from 'package:cowplot':

    align_plots
# Define the epochs you want to analyze
epochs <- 1:33

# Initialize results storage
results <- data.frame(epoch = numeric(),
                      r2_marginal = numeric(),
                      corr_similarity_behavior = numeric())

common_cols <- intersect(colnames(looking_data_w_openclip), colnames(looking_data_summarized))

# Bind rows using only common columns, adding epoch = 33 to summarized data
looking_data_w_openclip_clip <- looking_data_w_openclip %>%
  bind_rows(
    looking_data_summarized %>%
      mutate(epoch = 33) %>%
      select(all_of(common_cols), epoch)
  )

# Loop over epochs
for (e in epochs) {
  df <- looking_data_w_openclip_clip |> filter(epoch == e)

  # Skip if not enough data
  if (nrow(df) < 10) next

  # Fit model (update to your exact model formula if needed)
  model <- fit_image_model(df)

  # Extract marginal R²
  r2 <- tryCatch(r.squaredGLMM(model)[1, "R2m"], error = function(e) NA)

  # Compute Pearson correlation between image similarity and behavior
 coefs <- summary(model)$coefficients
  p_val <- NA
  if ("scale(image_similarity)" %in% rownames(coefs)) {
    p_val <- coefs["scale(image_similarity)", "Pr(>|t|)"]
  }
  # Store results
  results <- rbind(results, data.frame(epoch = e,
                                       r2_marginal = r2,
                                       p_value = p_val))
}

# Plot R²
plot_r2 <- ggplot(results, aes(x = epoch, y = r2_marginal)) +
  geom_smooth(color = "steelblue", size = 1.2) +
  geom_point(size = 2) +
  labs(title = "Marginal R² of image similarity LMER vs. CLIP Epoch",
       x = "Epoch",
       y = "Marginal R²") +
  theme_minimal()

# Plot correlation
plot_corr <- ggplot(results, aes(x = epoch, y = p_value)) +
  geom_smooth(color = "firebrick", size = 1.2) +
  geom_point(size = 2) +
  geom_hline(yintercept = 0.05, linetype = "dotted", color = "gray40") +
  annotate("text", x = max(results$epoch), y = 0.055, 
           label = "p = 0.05", hjust = 4, vjust = 0, size = 4, color = "gray40") +
  labs(title = "p-value of image similarity in full LMER vs. CLIP epoch",
       x = "Epoch",
       y = "p-value") +
  theme_minimal()

# Combine and display
plot_r2 / plot_corr
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Looking time across epochs

epoch_model <- lmer(
    scale(corrected_target_looking) ~ 
      scale(image_similarity) * scale(epoch) + 
      scale(age_in_months) +
      scale(AoA_Est_target) + 
      scale(MeanSaliencyDiff) +
      (1 | SubjectInfo.subjID) +
      (1 | Trials.targetImage) +
      (1 | Trials.imagePair),
    data = looking_data_w_openclip_clip |> 
      mutate(age_in_months = SubjectInfo.testAge / 30)
  )
summary(epoch_model)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: 
scale(corrected_target_looking) ~ scale(image_similarity) * scale(epoch) +  
    scale(age_in_months) + scale(AoA_Est_target) + scale(MeanSaliencyDiff) +  
    (1 | SubjectInfo.subjID) + (1 | Trials.targetImage) + (1 |  
    Trials.imagePair)
   Data: 
mutate(looking_data_w_openclip_clip, age_in_months = SubjectInfo.testAge/30)

REML criterion at convergence: 225194.3

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.0236 -0.6281 -0.0126  0.6838  3.1906 

Random effects:
 Groups             Name        Variance Std.Dev.
 SubjectInfo.subjID (Intercept) 0.05218  0.2284  
 Trials.targetImage (Intercept) 0.02820  0.1679  
 Trials.imagePair   (Intercept) 0.01562  0.1250  
 Residual                       0.91538  0.9568  
Number of obs: 81708, groups:  
SubjectInfo.subjID, 91; Trials.targetImage, 24; Trials.imagePair, 16

Fixed effects:
                                       Estimate Std. Error         df t value
(Intercept)                          -1.973e-02  5.235e-02  4.227e+01  -0.377
scale(image_similarity)              -1.372e-03  6.505e-03  3.843e+04  -0.211
scale(epoch)                          2.719e-04  3.714e-03  8.051e+04   0.073
scale(age_in_months)                  5.515e-02  2.404e-02  8.906e+01   2.294
scale(AoA_Est_target)                -6.191e-02  3.391e-02  1.954e+01  -1.826
scale(MeanSaliencyDiff)               1.300e-02  2.628e-02  2.563e+01   0.494
scale(image_similarity):scale(epoch) -4.642e-04  3.581e-03  7.033e+04  -0.130
                                     Pr(>|t|)  
(Intercept)                            0.7082  
scale(image_similarity)                0.8330  
scale(epoch)                           0.9416  
scale(age_in_months)                   0.0241 *
scale(AoA_Est_target)                  0.0832 .
scale(MeanSaliencyDiff)                0.6252  
scale(image_similarity):scale(epoch)   0.8969  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) scl(p) sc(__) s(AA_E s(MSD)
scl(mg_sml) -0.016                                   
scale(epch)  0.000 -0.326                            
scl(g_n_mn)  0.006  0.000  0.000                     
scl(AA_Es_)  0.011 -0.001  0.000  0.000              
scl(MnSlnD)  0.023  0.010 -0.003  0.000 -0.025       
scl(mg_):() -0.023  0.669 -0.007  0.000 -0.001  0.008
r.squaredGLMM(epoch_model)
             R2m       R2c
[1,] 0.007045641 0.1013007

Additional models

create_model_plots <- function(input_similarities, median_age, name="CVCL") {
  similarities_combined <- input_similarities |>
    rename(word_a = word1, word_b = word2) |>
    bind_rows(
      input_similarities |>
        rename(word_a = word2, word_b = word1)
    )
  
  looking_data_w_model <- looking_data_summarized |>
    select(-text_similarity, -multimodal_similarity, -image_similarity) |>
    left_join(similarities_combined, by = c("Trials.distractorImage"="word_a", "Trials.targetImage"="word_b"))
  
  data_summarized <- summarize_similarity_data(looking_data_w_model)
  
  age_half_summarized <- looking_data_w_model |>
    add_age_split() |>
    summarize_similarity_data(extra_fields = c("age_half"))
  
  p <- generate_multimodal_plots(data_summarized, name)
  return(list(
    plot = p,
    data = looking_data_w_model
  ))
}

CVCL

cvcl_similarities
       word1   word2 image_similarity text_similarity multimodal_similarity
1     turkey    swan       0.17444706      0.02558377            0.13672301
2     turtle   horse       0.17161047      0.12650073            0.08804899
3     cheese     mud       0.12267336      0.04901435           -0.03633275
4   squirrel   eagle       0.22197309      0.17859630            0.20261563
5     potato glasses       0.15317366      0.07150085            0.11340694
6  bulldozer  orange      -0.02197811      0.02320719           -0.03731379
7      snail     cow      -0.02817563      0.03850634           -0.04224624
8      acorn     key       0.19834010      0.13790375            0.22495553
9     turkey    goat       0.15390062      0.25035784            0.15649948
10    turtle    frog       0.30299562     -0.05592079            0.02503634
11    cheese  butter       0.35650474      0.10390703            0.08898249
12  squirrel  monkey       0.34328401      0.04259083            0.20561886
13    potato     pot       0.06314112     -0.02086087            0.06438187
14 bulldozer tractor       0.43432832      0.10392722            0.32970104
15     snail    worm       0.07686698      0.13534546            0.05373919
16     acorn coconut       0.34766978      1.00000012            0.69287187
17    turkey    swan       0.17444706      0.02558377            0.13672301
18    turtle   horse       0.17161047      0.12650073            0.08804899
19    cheese     mud       0.12267336      0.04901435           -0.03633275
20  squirrel   eagle       0.22197309      0.17859630            0.20261563
21    potato glasses       0.15317366      0.07150085            0.11340694
22 bulldozer  orange      -0.02197811      0.02320719           -0.03731379
23     snail     cow      -0.02817563      0.03850634           -0.04224624
24     acorn     key       0.19834010      0.13790375            0.22495553
25    turkey    goat       0.15390062      0.25035784            0.15649948
26    turtle    frog       0.30299562     -0.05592079            0.02503634
27    cheese  butter       0.35650474      0.10390703            0.08898249
28  squirrel  monkey       0.34328401      0.04259083            0.20561886
29    potato     pot       0.06314112     -0.02086087            0.06438187
30 bulldozer tractor       0.43432832      0.10392722            0.32970104
31     snail    worm       0.07686698      0.13534546            0.05373919
32     acorn coconut       0.34766978      1.00000012            0.69287187
cvcl_summary <- create_model_plots(cvcl_similarities, name="CVCL", median_age=input_median_age)
Warning in left_join(select(looking_data_summarized, -text_similarity, -multimodal_similarity, : Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 1 of `x` matches multiple rows in `y`.
ℹ Row 6 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
  "many-to-many"` to silence this warning.
Registered S3 methods overwritten by 'broom':
  method        from 
  nobs.fitdistr MuMIn
  nobs.multinom MuMIn
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
Warning: ggrepel: 60 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
Warning: ggrepel: 56 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
Warning: ggrepel: 60 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
imagenet_vit_summary <- create_model_plots(imagenetvit_similarities, name="ImageNetVIT", median_age=input_median_age)
`geom_smooth()` using formula = 'y ~ x'
Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
saycamvit_summary <- create_model_plots(saycamvit_similarities, name="SayCamVIT",median_age=input_median_age)
`geom_smooth()` using formula = 'y ~ x'
Warning: ggrepel: 7 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
cvcl_model <- fit_image_model(cvcl_summary$data)
summary(cvcl_model)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: 
scale(corrected_target_looking) ~ scale(image_similarity) * scale(age_in_months) +  
    scale(AoA_Est_target) + scale(MeanSaliencyDiff) + (1 | SubjectInfo.subjID) +  
    (1 | Trials.targetImage)
   Data: mutate(summary_data, age_in_months = SubjectInfo.testAge/30)

REML criterion at convergence: 13912.5

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.92934 -0.62674 -0.02029  0.67233  2.95343 

Random effects:
 Groups             Name        Variance Std.Dev.
 SubjectInfo.subjID (Intercept) 0.03494  0.1869  
 Trials.targetImage (Intercept) 0.01582  0.1258  
 Residual                       0.94062  0.9699  
Number of obs: 4952, groups:  SubjectInfo.subjID, 91; Trials.targetImage, 24

Fixed effects:
                                               Estimate Std. Error         df
(Intercept)                                  -1.464e-02  3.544e-02  3.817e+01
scale(image_similarity)                      -9.371e-03  2.048e-02  1.640e+02
scale(age_in_months)                          5.670e-02  2.393e-02  9.002e+01
scale(AoA_Est_target)                        -8.775e-02  2.880e-02  2.194e+01
scale(MeanSaliencyDiff)                       6.229e-03  2.403e-02  5.537e+01
scale(image_similarity):scale(age_in_months) -1.591e-02  1.376e-02  4.859e+03
                                             t value Pr(>|t|)   
(Intercept)                                   -0.413  0.68175   
scale(image_similarity)                       -0.458  0.64786   
scale(age_in_months)                           2.369  0.01997 * 
scale(AoA_Est_target)                         -3.047  0.00592 **
scale(MeanSaliencyDiff)                        0.259  0.79641   
scale(image_similarity):scale(age_in_months)  -1.157  0.24752   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) sc(__) s(AA_E s(MSD)
scl(mg_sml)  0.002                            
scl(g_n_mn)  0.005 -0.007                     
scl(AA_Es_)  0.012 -0.182  0.006              
scl(MnSlnD)  0.025  0.115 -0.007  0.012       
scl(_):(__) -0.001  0.003  0.010  0.003 -0.004
cvcl_summary$plot
Warning: ggrepel: 60 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
Warning: ggrepel: 62 unlabeled data points (too many overlaps). Consider increasing max.overlaps
ggrepel: 62 unlabeled data points (too many overlaps). Consider increasing max.overlaps

SayCAM-VIT

saycamvit_model <- fit_image_model(saycamvit_summary$data)
summary(saycamvit_model)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: 
scale(corrected_target_looking) ~ scale(image_similarity) * scale(age_in_months) +  
    scale(AoA_Est_target) + scale(MeanSaliencyDiff) + (1 | SubjectInfo.subjID) +  
    (1 | Trials.targetImage)
   Data: mutate(summary_data, age_in_months = SubjectInfo.testAge/30)

REML criterion at convergence: 7007.2

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.8292 -0.6194 -0.0167  0.6710  2.8328 

Random effects:
 Groups             Name        Variance Std.Dev.
 SubjectInfo.subjID (Intercept) 0.01728  0.1315  
 Trials.targetImage (Intercept) 0.01008  0.1004  
 Residual                       0.96066  0.9801  
Number of obs: 2476, groups:  SubjectInfo.subjID, 91; Trials.targetImage, 24

Fixed effects:
                                               Estimate Std. Error         df
(Intercept)                                  -9.100e-03  3.205e-02  2.673e+01
scale(image_similarity)                      -2.098e-02  2.573e-02  7.299e+01
scale(age_in_months)                          5.751e-02  2.407e-02  9.096e+01
scale(AoA_Est_target)                        -8.641e-02  2.926e-02  2.267e+01
scale(MeanSaliencyDiff)                       1.293e-03  2.659e-02  3.654e+01
scale(image_similarity):scale(age_in_months) -5.101e-02  1.976e-02  2.383e+03
                                             t value Pr(>|t|)   
(Intercept)                                   -0.284   0.7786   
scale(image_similarity)                       -0.815   0.4175   
scale(age_in_months)                           2.390   0.0189 * 
scale(AoA_Est_target)                         -2.953   0.0072 **
scale(MeanSaliencyDiff)                        0.049   0.9615   
scale(image_similarity):scale(age_in_months)  -2.582   0.0099 **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) sc(__) s(AA_E s(MSD)
scl(mg_sml) -0.002                            
scl(g_n_mn)  0.003 -0.004                     
scl(AA_Es_)  0.010 -0.279  0.010              
scl(MnSlnD)  0.021  0.028 -0.009  0.018       
scl(_):(__)  0.003  0.008  0.006  0.000 -0.002
saycamvit_summary$plot
Warning: ggrepel: 29 unlabeled data points (too many overlaps). Consider
increasing max.overlaps

ImageNet-VIT

imagenet_vit_model <- fit_image_model(imagenet_vit_summary$data)
summary(imagenet_vit_model)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: 
scale(corrected_target_looking) ~ scale(image_similarity) * scale(age_in_months) +  
    scale(AoA_Est_target) + scale(MeanSaliencyDiff) + (1 | SubjectInfo.subjID) +  
    (1 | Trials.targetImage)
   Data: mutate(summary_data, age_in_months = SubjectInfo.testAge/30)

REML criterion at convergence: 7010.3

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.82125 -0.62190 -0.02172  0.66775  2.83401 

Random effects:
 Groups             Name        Variance Std.Dev.
 SubjectInfo.subjID (Intercept) 0.01724  0.1313  
 Trials.targetImage (Intercept) 0.01031  0.1015  
 Residual                       0.96184  0.9807  
Number of obs: 2476, groups:  SubjectInfo.subjID, 91; Trials.targetImage, 24

Fixed effects:
                                               Estimate Std. Error         df
(Intercept)                                  -9.089e-03  3.220e-02  2.645e+01
scale(image_similarity)                      -1.220e-02  2.618e-02  4.980e+01
scale(age_in_months)                          5.747e-02  2.407e-02  9.098e+01
scale(AoA_Est_target)                        -9.012e-02  2.884e-02  2.157e+01
scale(MeanSaliencyDiff)                       1.361e-03  2.673e-02  3.600e+01
scale(image_similarity):scale(age_in_months) -3.926e-02  1.978e-02  2.384e+03
                                             t value Pr(>|t|)   
(Intercept)                                   -0.282  0.77998   
scale(image_similarity)                       -0.466  0.64320   
scale(age_in_months)                           2.388  0.01900 * 
scale(AoA_Est_target)                         -3.124  0.00501 **
scale(MeanSaliencyDiff)                        0.051  0.95968   
scale(image_similarity):scale(age_in_months)  -1.985  0.04728 * 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) sc(__) s(AA_E s(MSD)
scl(mg_sml) -0.002                            
scl(g_n_mn)  0.003 -0.003                     
scl(AA_Es_)  0.010 -0.199  0.009              
scl(MnSlnD)  0.021  0.039 -0.009  0.019       
scl(_):(__)  0.003  0.001  0.008 -0.002 -0.001
imagenet_vit_summary$plot
Warning: ggrepel: 30 unlabeled data points (too many overlaps). Consider
increasing max.overlaps

With both SayCAM VIT and ImageNet VIT we do see this interesting interaction between age and similarity, where the influence of similarity decreases with age.

comparing similarity values across our measures

library(dplyr)
library(GGally)
Registered S3 method overwritten by 'GGally':
  method from   
  +.gg   ggplot2
library(ggplot2)

# Join all similarity datasets with trial metadata
combined_data <- cvcl_similarities %>%
  # Join with imagenetvit similarities
  left_join(imagenetvit_similarities, 
            by = c("word1", "word2"), 
            suffix = c("_cvcl", "_imagenetvit")) %>%
  # Join with saycamvit similarities  
  left_join(saycamvit_similarities, 
            by = c("word1", "word2")) %>%
  rename(image_similarity_saycamvit = image_similarity) %>%
  # Join with openclip epoch 32
  left_join(openclip_similarities %>% filter(epoch == 32), 
            by = c("word1", "word2")) %>%
  rename(image_similarity_openclip32 = image_similarity) %>%
  # Join with openclip epoch 3
  left_join(openclip_similarities %>% filter(epoch == 3), 
            by = c("word1", "word2")) %>%
  rename(image_similarity_openclip3 = image_similarity) %>%
  
  # Join with trial metadata to get saliency diff and clip_sim
  left_join(trial_metadata %>% 
            distinct(Trials.imagePair, .keep_all = TRUE) %>%
            # Create word1/word2 columns from target/distractor images
            mutate(word1 = Trials.targetImage,
                   word2 = Trials.distractorImage) %>%
            select(word1, word2, MeanSaliencyDiff, image_similarity),
            by = c("word1", "word2")) %>%
  rename(clip_sim = image_similarity) %>%
  distinct(word1, word2, .keep_all = TRUE) |>
  # Select and rename the six variables for visualization
  select(
    cvcl_sim = image_similarity_cvcl,
    imagenetvit_sim = image_similarity_imagenetvit, 
    saycamvit_sim = image_similarity_saycamvit,
    openclip_epoch3_sim = image_similarity_openclip3,
    openclip_epoch32_sim = image_similarity_openclip32,
    mean_saliency_diff = MeanSaliencyDiff,
    clip_sim = clip_sim,
    word1 = word1, 
    word2 = word2
  ) 

ggplot(combined_data, aes(x=clip_sim, y=openclip_epoch3_sim)) +
  geom_point(size=3)+
  ggrepel::geom_label_repel(aes(label=paste0(word1, "-", word2))) +
  ggpubr::stat_cor() +
  geom_smooth(method="lm")
Warning: Removed 8 rows containing non-finite outside the scale range
(`stat_cor()`).
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 8 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 8 rows containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 8 rows containing missing values or values outside the scale range
(`geom_label_repel()`).

Correlation matrices

# Create ggpairs plot
similarity_plot <- ggpairs(
  combined_data,
  columns = c("cvcl_sim", "imagenetvit_sim", "saycamvit_sim", 
              "openclip_epoch3_sim", "openclip_epoch32_sim", 
              "mean_saliency_diff", "clip_sim"),
  title = "Correlations Between Different Similarity Measures")
ggsave("similarity_correlations.png", similarity_plot, 
       width = 16, height = 12, dpi = 300, bg = "white")
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 8 rows containing missing values
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 8 rows containing missing values
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 8 rows containing missing values
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 8 rows containing missing values
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 8 rows containing missing values
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 8 rows containing missing values
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 8 rows containing missing values
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 8 rows containing missing values
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 8 rows containing missing values
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 8 rows containing missing values
Warning: Removed 8 rows containing missing values or values outside the scale range
(`geom_point()`).
Removed 8 rows containing missing values or values outside the scale range
(`geom_point()`).
Removed 8 rows containing missing values or values outside the scale range
(`geom_point()`).
Removed 8 rows containing missing values or values outside the scale range
(`geom_point()`).
Removed 8 rows containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 8 rows containing non-finite outside the scale range
(`stat_density()`).
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 8 rows containing missing values
Warning: Removed 8 rows containing missing values or values outside the scale range
(`geom_point()`).
Removed 8 rows containing missing values or values outside the scale range
(`geom_point()`).
Removed 8 rows containing missing values or values outside the scale range
(`geom_point()`).
Removed 8 rows containing missing values or values outside the scale range
(`geom_point()`).
Removed 8 rows containing missing values or values outside the scale range
(`geom_point()`).
Removed 8 rows containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 8 rows containing non-finite outside the scale range
(`stat_density()`).
# Display the plot
print(similarity_plot)
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 8 rows containing missing values
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 8 rows containing missing values
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 8 rows containing missing values
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 8 rows containing missing values
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 8 rows containing missing values
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 8 rows containing missing values
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 8 rows containing missing values
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 8 rows containing missing values
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 8 rows containing missing values
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 8 rows containing missing values
Warning: Removed 8 rows containing missing values or values outside the scale range
(`geom_point()`).
Removed 8 rows containing missing values or values outside the scale range
(`geom_point()`).
Removed 8 rows containing missing values or values outside the scale range
(`geom_point()`).
Removed 8 rows containing missing values or values outside the scale range
(`geom_point()`).
Removed 8 rows containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 8 rows containing non-finite outside the scale range
(`stat_density()`).
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 8 rows containing missing values
Warning: Removed 8 rows containing missing values or values outside the scale range
(`geom_point()`).
Removed 8 rows containing missing values or values outside the scale range
(`geom_point()`).
Removed 8 rows containing missing values or values outside the scale range
(`geom_point()`).
Removed 8 rows containing missing values or values outside the scale range
(`geom_point()`).
Removed 8 rows containing missing values or values outside the scale range
(`geom_point()`).
Removed 8 rows containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 8 rows containing non-finite outside the scale range
(`stat_density()`).

# Print correlation matrix
cat("Correlation matrix:\n")
Correlation matrix:
cor_matrix <- cor(combined_data |> select(-word1, -word2), use = "complete.obs")
print(round(cor_matrix, 3))
                     cvcl_sim imagenetvit_sim saycamvit_sim openclip_epoch3_sim
cvcl_sim                1.000           0.775         0.791               0.599
imagenetvit_sim         0.775           1.000         0.919               0.401
saycamvit_sim           0.791           0.919         1.000               0.560
openclip_epoch3_sim     0.599           0.401         0.560               1.000
openclip_epoch32_sim    0.299           0.386         0.499               0.864
mean_saliency_diff      0.551           0.563         0.703               0.288
clip_sim                0.539           0.460         0.517               0.849
                     openclip_epoch32_sim mean_saliency_diff clip_sim
cvcl_sim                            0.299              0.551    0.539
imagenetvit_sim                     0.386              0.563    0.460
saycamvit_sim                       0.499              0.703    0.517
openclip_epoch3_sim                 0.864              0.288    0.849
openclip_epoch32_sim                1.000              0.050    0.858
mean_saliency_diff                  0.050              1.000    0.044
clip_sim                            0.858              0.044    1.000

DevBench

openclip_data <- read.csv(here("analysis", "ccn2025","openclip.csv"))
# Assuming looking_data_summarized is already loaded

trialid_level_summary <- target_looking_trial_level |> left_join(trial_metadata)
Joining with `by = join_by(Trials.trialID, AoA_Est_distractor, AoA_Est_target,
Trials.targetImage, Trials.distractorImage, Trials.imagePair)`
# Prepare human data for compare_lwl function
# The compare_lwl function expects human data with 'prop' and 'age_bin' columns
human_data_lwl <- trialid_level_summary %>%
  arrange(Trials.targetImage, Trials.distractorImage) %>%
  mutate(prop = mean_value + 0.5) %>%
  mutate(trial = row_number()) %>%
  transmute(prop, age_bin="14-24", trial)  # Add other columns as needed

# Function to compare model with human data
compare_lwl <- function(model_data, human_data) {
  model_data_correct <- model_data %>% 
    mutate(correct = image1 > image2)
  
  human_data_nested <- human_data %>% 
    rename(image1 = prop) %>% 
    mutate(image2 = 1 - image1) %>% 
    nest(data = -age_bin) |> 
    mutate(opt_kl = lapply(data, function(d) get_opt_kl(d, model_data_correct)),
           kl = sapply(opt_kl, function(r) r$objective),
           beta = sapply(opt_kl, function(r) r$solution),
           iters = sapply(opt_kl, function(r) r$iterations),
           accuracy = mean(model_data_correct$correct, na.rm = TRUE)) %>% 
    select(-opt_kl, -data)
  
  return(human_data_nested)
}

# Function to process model similarity data
process_model_data <- function(data, similarity_col, model_name, calc_kl=TRUE) {
  # Parse comma-separated similarity values
  similarity_pairs <- strsplit(data[[similarity_col]], ",")
  
  # Convert to numeric and create data frame
  res <- data.frame(
    image1 = sapply(similarity_pairs, function(x) as.numeric(trimws(x[1]))),
    image2 = sapply(similarity_pairs, function(x) as.numeric(trimws(x[2]))),
    trial = seq_along(similarity_pairs)
  )
  
  # Calculate accuracy (assuming image1 corresponds to target, image2 to distractor)
  acc <- res %>%
    mutate(correct = image1 > image2) %>%
    summarise(accuracy = mean(correct, na.rm = TRUE)) %>%
    pull(accuracy)
  if (calc_kl) {
      # Compare with human data
  kls <- compare_lwl(res, human_data_lwl) %>% 
    mutate(model = model_name,
           accuracy = acc)
  
  return(kls)
  } else {
    return (res)
  }

}

clip_results <- process_model_data(clip_data |> arrange(target, distractor) |> mutate(trial=row_number()), "multimodal_similarity", "CLIP")
clip_results
# A tibble: 1 × 6
  age_bin      kl   beta iters accuracy model
  <chr>     <dbl>  <dbl> <int>    <dbl> <chr>
1 14-24   0.00937 0.0251    75    0.969 CLIP 

openclip

openclip_data <- read.csv(here("analysis", "ccn2025","openclip.csv"))

openclip_div_lwl <- openclip_data %>%
    group_by(epoch) %>%
    group_split() %>%
    map_dfr(function(epoch_data) {
      current_epoch <- unique(epoch_data$epoch)
      
      # Process this epoch's data
      epoch_results <- process_model_data(epoch_data |> arrange(word1, word2), "multimodal_similarity", paste0("OpenCLIP_epoch_", current_epoch))
      
      # Add epoch information
      epoch_results %>%
        mutate(epoch = current_epoch)
    })


lwl_oc <- ggplot(openclip_div_lwl |> rowwise() |> mutate(epoch = unique_checkpoints[[epoch]]), 
                 aes(x = log(epoch), y = kl, col = as.factor(age_bin))) +
  theme_classic() +
  geom_point() +
  geom_smooth(span = 1) +#, method = "lm") +
  # scale_colour_continuous() +
  # theme_classic() +
  labs(x = "log(Epoch)",
       y = TeX("Model–human dissimilarity ($D^*_{KL}$)"),
       col = "Age") +
  guides(colour = guide_legend(position = "inside")) +
  coord_cartesian(ylim = c(0, 0.02)) +
  theme(legend.position.inside = c(0.9, 0.8)) 
lwl_oc
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

compare_lwl(human_data_lwl |> mutate(image1=prop, image2=1-prop), human_data_lwl)
# A tibble: 1 × 5
  age_bin         kl  beta iters accuracy
  <chr>        <dbl> <dbl> <int>    <dbl>
1 14-24   0.00000474  2.03    29    0.719

Quantifying multimodal accuracy?

multimodal_sim_vals <- cbind(trialid_level_summary |> arrange(Trials.targetImage, Trials.distractorImage), softmax_images(process_model_data(clip_data, "multimodal_similarity", "CLIP", calc_kl = FALSE), clip_results$beta) |> transmute(mm_acc = image1))

ggplot(multimodal_sim_vals |> filter(mm_acc > 0.5), aes(x=mm_acc, y=mean_value+0.5)) +
  theme_classic() +
  geom_point() +
  geom_smooth(method="lm") +
  ggpubr::stat_cor()
`geom_smooth()` using formula = 'y ~ x'

looking_data_summarized <- looking_data_summarized |> left_join(multimodal_sim_vals |> select(Trials.targetImage, Trials.distractorImage, mm_acc))
Joining with `by = join_by(Trials.targetImage, Trials.distractorImage)`

Thinking more about this, this is the wrong way of going about this since the beta is optimizing KL divergence and not accuracy per se.

mm_acc_model <- lmer(
    scale(corrected_target_looking+0.5) ~ 
      scale(mm_acc) * scale(age_in_months) + 
      scale(AoA_Est_target) + 
      scale(MeanSaliencyDiff) +
      (1 | SubjectInfo.subjID) +
      (1 | Trials.targetImage),
    data = looking_data_summarized |> 
      mutate(age_in_months = SubjectInfo.testAge / 30)
  )
summary(mm_acc_model)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: 
scale(corrected_target_looking + 0.5) ~ scale(mm_acc) * scale(age_in_months) +  
    scale(AoA_Est_target) + scale(MeanSaliencyDiff) + (1 | SubjectInfo.subjID) +  
    (1 | Trials.targetImage)
   Data: 
mutate(looking_data_summarized, age_in_months = SubjectInfo.testAge/30)

REML criterion at convergence: 7013.1

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.85595 -0.62309 -0.01738  0.66754  2.82580 

Random effects:
 Groups             Name        Variance Std.Dev.
 SubjectInfo.subjID (Intercept) 0.017056 0.13060 
 Trials.targetImage (Intercept) 0.009987 0.09994 
 Residual                       0.963154 0.98140 
Number of obs: 2476, groups:  SubjectInfo.subjID, 91; Trials.targetImage, 24

Fixed effects:
                                     Estimate Std. Error         df t value
(Intercept)                        -8.531e-03  3.197e-02  2.618e+01  -0.267
scale(mm_acc)                      -1.052e-02  2.444e-02  8.285e+01  -0.430
scale(age_in_months)                5.779e-02  2.403e-02  9.108e+01   2.404
scale(AoA_Est_target)              -9.450e-02  2.829e-02  2.145e+01  -3.340
scale(MeanSaliencyDiff)             1.616e-03  2.657e-02  3.617e+01   0.061
scale(mm_acc):scale(age_in_months)  2.229e-02  1.974e-02  2.388e+03   1.129
                                   Pr(>|t|)   
(Intercept)                         0.79169   
scale(mm_acc)                       0.66804   
scale(age_in_months)                0.01823 * 
scale(AoA_Est_target)               0.00304 **
scale(MeanSaliencyDiff)             0.95182   
scale(mm_acc):scale(age_in_months)  0.25889   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) scl(_) sc(__) s(AA_E s(MSD)
scal(mm_cc) -0.018                            
scl(g_n_mn)  0.003 -0.001                     
scl(AA_Es_)  0.008  0.130  0.008              
scl(MnSlnD)  0.020  0.026 -0.009  0.030       
scl(_):(__)  0.001  0.007 -0.001  0.002  0.005

```