Setup

library(tidyverse)
library(lubridate)
library(ggthemes)
library(assertthat)
library(langcog) # for CIs
library(mirt)
library(ggpubr)
library(knitr)
library(dplyr)
library(here)
library(lme4)
library(lmerTest)

Load data from multiafc task

test_corpus <- read_csv(here::here("item_generation/4_create_multi_AFC_stimuli/new_test.csv")) %>%
  filter(!Word1 %in% c('honey','scrabble')) # manually excluded

test_clip_cor <- read_csv(here::here("item_generation/4_create_multi_AFC_stimuli/exp1_all_trials2023-04-11.csv")) %>%
  select(Word1, Word2, trial_type, cor, AoA_Est_Word2, AoA_Est_Word1) %>%
  rename(wordPairing = trial_type) %>%
  rename(clip_cor = cor) %>%
  filter(Word1 %in% unique(test_corpus$Word1)) %>%
  filter(!Word1 %in% c('honey','scrabble')) # manually excluded

Create data structure and merge that has clip cor =1 for target selection

test_clip_cor_identity <- test_clip_cor %>%
  select(Word1, AoA_Est_Word1) %>%
  distinct(Word1, AoA_Est_Word1) %>%
  mutate(Word2 = Word1) %>%
  mutate(AoA_Est_Word2 = AoA_Est_Word1) %>%
  mutate(clip_cor = 1) %>%
  mutate(wordPairing = 'target')

test_clip_cor <- test_clip_cor %>%
  full_join(test_clip_cor_identity) %>%
  arrange(Word1)

clip_to_join <- test_clip_cor %>%
  rename(targetWord = Word1) %>%
  rename(answerWord = Word2)

test_corpus <- test_corpus %>%
  left_join(test_clip_cor)

sum(is.na(test_corpus$clip_cor))

## [1] 0

bing = read_csv(here::here('data/validation_data/preprocessed_data/all_bing_data_for_item_info.csv'))  %>%
  mutate(age_group = as.numeric(floor(age_in_months/12))) %>%
  select(-block)

multi-afc adults

adults <- read_csv(here::here("data/data_multiafc/multi-afc-april28.csv"))  %>%
  mutate(age_group = as.numeric(25)) %>%
  select(-block)

# trial_data <- read_csv(here::here("data/data_multiafc/prod_roar-prod_tasks_vocab_raw_trials_081623.csv")) %>%

trial_data <- read_csv(here::here("data/rocketship_data/multi-afc-with-meta.csv")) %>% 
  select(-block) %>%
  mutate(age_group = as.numeric(age_group)) %>%
  filter(age_group<12) %>% # older kids just don't have enough trials
  full_join(bing) %>% # 3-5 year olds
  full_join(adults) %>%
  filter(studyId %in% c("school-multiAFC", "prolific-multiAFC", "validation-multiAFC"), completed == TRUE, task =="test_response") %>% 
  mutate(schoolId = ifelse(studyId == "school-multiAFC", "school", schoolId)) %>%
  mutate(options_clean = str_replace_all(options, "'", "\"")) # necessary for json parsing

Create functions for parsing options from nested json

parse_json_column <- function(json_col) {
  map(json_col, ~as.data.frame(t(unlist(jsonlite::fromJSON(.x, flatten = TRUE)))))
}

parsed_list <- parse_json_column(trial_data$options_clean)

# Combine the list of data frames into a single data frame
parsed_df <- bind_rows(parsed_list)

# Combine the parsed columns with the original data frame
final_df <- cbind(trial_data %>% select(-options), parsed_df)

Construct trial structure

df.multiAFC.trials <- final_df %>% 
  pivot_longer(cols = c("0", "1", "2", "3")) %>% 
  filter(answerWord == value) %>% 
  select(pid, runId, schoolId, answerWord, targetWord, numAFC, value, correct, rt, age_group) %>% 
  left_join(test_corpus %>% 
              rename(targetWord = "Word1", answerWord = "Word2")) %>%
  filter(is.na(itemGroup) | (itemGroup == "test")) %>% 
  mutate(wordPairing = ifelse(is.na(wordPairing), "target", wordPairing)) %>%
  filter(!targetWord %in%  c("ant","ball","bear","scrabble","honey")) %>% 
  select(pid, runId, schoolId, answerWord, targetWord, numAFC, value, correct, rt, wordPairing, age_group, clip_cor)

Create age/group and number of participants for filtering for plots / age-based analyses

age_group_pid <- df.multiAFC.trials %>%
  group_by(age_group) %>%
  summarize(num_pids = length(unique(pid)))

We have 1446 children aged 3-11 years, and 205 adults.

Need to write code to fill out response pattern for all possible target/distractor pairings…for use later.

full_item_structure_4AFC <- clip_to_join %>%
  mutate(numAFC = 4)

full_item_structure_3AFC <- clip_to_join %>%
  filter(wordPairing != 'distal') %>%
  mutate(numAFC = 3)

full_item_structure_2AFC <- clip_to_join %>%
  filter(!wordPairing %in% c('distal','easy')) %>%
  mutate(numAFC = 2)

full_item_structure <- full_item_structure_4AFC %>%
  full_join(full_item_structure_3AFC) %>%
  full_join(full_item_structure_2AFC)

Now fill this out by all ages in the dataset, oof

full_item_structure_by_age = map_df(age_group_pid$age_group, ~full_item_structure %>% mutate(age_group = .x))

Calculate proportion of times participants chose an image

First do this across all age groups

df.multiAFC.totalAttempts <- df.multiAFC.trials %>% 
  group_by(targetWord, numAFC) %>% 
  tally() %>% 
  dplyr::rename(totalAttempts = n)

df.multiAFC.distractor.summary <- df.multiAFC.trials %>% 
  group_by(targetWord,  answerWord, numAFC, wordPairing) %>% 
  tally() %>%
  left_join(df.multiAFC.totalAttempts) %>% 
  mutate(perc = n/totalAttempts) %>%
  full_join(full_item_structure) %>% # need to fill out item structure (but not by age)
  mutate(perc = replace_na(perc, replace=0))

hist(df.multiAFC.totalAttempts$totalAttempts)

target_pc <- df.multiAFC.distractor.summary %>%
  filter(wordPairing=='target')

hist(target_pc$perc)

Look at low/high accuracy items across all ages

low_acc_items <- target_pc %>%
  filter(perc<.5) %>%
  kable()

high_acc_items <- target_pc %>%
  filter(perc>.95) %>%
  kable()

Construct same dataframe but now by age group

df.multiAFC.totalAttempts.byAge <- df.multiAFC.trials %>% 
  group_by(targetWord, numAFC, age_group) %>% 
  tally() %>% 
  dplyr::rename(totalAttempts = n)

df.multiAFC.distractor.summary.byAge <- df.multiAFC.trials %>% 
  group_by(targetWord,  answerWord, numAFC, wordPairing, age_group) %>% 
  tally() %>% 
  left_join(df.multiAFC.totalAttempts.byAge) %>% 
  mutate(perc = n/totalAttempts) %>%
  full_join(full_item_structure_by_age)

couple of sanity checks for missing values that shuold be zero

assert_that(sum(is.na(df.multiAFC.distractor.summary.byAge$age_group))==0)

## [1] TRUE

assert_that(sum(is.na(df.multiAFC.distractor.summary.byAge$clip_cor))==0)

## [1] TRUE

There are some missing observations because these items were never chosen on a given trial/age group – need to fill those out

But we also have some target/distractor/age group pairings that are VERY sparsely populated –

hist(df.multiAFC.distractor.summary.byAge$totalAttempts)

For now, create “0” value in the percent chosen category

sum(is.na(df.multiAFC.distractor.summary.byAge$perc))

## [1] 2502

df.multiAFC.distractor.summary.byAge <- df.multiAFC.distractor.summary.byAge %>%
  mutate(perc = replace_na(perc, replace=0))

Calcualte summary by distractor type and numAFC by age, group, use confidence intervals here

df.multiAFC.distractor.summary.perc <- df.multiAFC.distractor.summary.byAge %>% 
  ungroup() %>%
  mutate(wordPairing = factor(wordPairing, levels = c('target','hard','easy','distal')))  %>% # just reordering
  group_by(age_group, numAFC, wordPairing) %>% 
  multi_boot_standard(col = 'perc')  %>% # compute CIs
  mutate(number_afc = as.factor(paste0(numAFC,' AFC'))) #cosmetic for plots

First visualize within each age group – can plots nAFCs by different colors

Looks like gradient is getting sharper with age, as expected

ggplot(data = df.multiAFC.distractor.summary.perc, aes(x=wordPairing, y=mean, col=number_afc)) +
  geom_point() +
  geom_linerange(aes(y=mean, ymax = ci_upper, ymin = ci_lower)) +
  geom_line(aes(group=number_afc)) +
  facet_wrap(~number_afc, nrow=3) +
  theme(aspect.ratio=.75) +
  theme_few() +
  ylab('Proportion images chosen') +
  facet_wrap(~age_group)

Plot gradient in selection across age within afc

Now plot within afc, with age groups as colors

ggplot(data = df.multiAFC.distractor.summary.perc %>% filter(age_group<25), aes(x=wordPairing, y=mean, col=age_group)) +
  geom_point(alpha=.8) +
  geom_point(data = df.multiAFC.distractor.summary.perc %>% filter(age_group==25), color='grey', alpha=.8) +
  geom_linerange(aes(y=mean, ymax = ci_upper, ymin = ci_lower), alpha=.3) +
  geom_linerange(data = df.multiAFC.distractor.summary.perc %>% filter(age_group==25), aes(y=mean, ymax = ci_upper, ymin = ci_lower), alpha=.3, color='grey') +
  geom_line(data = df.multiAFC.distractor.summary.perc %>% filter(age_group==25), color='grey',aes(group=age_group)) +
  geom_line(aes(group=age_group)) +
  facet_wrap(~number_afc) +
  theme(aspect.ratio=.75) +
  scale_color_viridis_c(name='Age (in years)') +
  xlab('') +
  ylab('Proportion chosen') +
  theme_few() +
  ylim(0,1) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

  # ggtitle('Proportion images chosen by # of distractors')

ggplot(data = df.multiAFC.distractor.summary.perc, aes(x=age_group, y=mean, col=wordPairing)) +
  geom_point() +
  geom_linerange(aes(y=mean, ymax = ci_upper, ymin = ci_lower), alpha=.2) +
  geom_smooth(aes(group=wordPairing), span=10) +
  facet_wrap(~number_afc) +
  theme(aspect.ratio=.75) +
  # scale_color_viridis_c(name='Age (in years)') +
  xlab('') +
  ylab('Proportion chosen') +
  theme_few() +
  ylim(0,1) +
  ggtitle('Proportion images chosen by # of distractors')

Plot item effects in order over all kids

df.multiAFC.distractor.summary.word <- df.multiAFC.distractor.summary %>% 
  group_by(numAFC, wordPairing, targetWord) %>% 
  summarise(percentile = mean(perc)) %>% 
  arrange(targetWord, numAFC) %>%
  ungroup() %>%
  mutate(targetWord = fct_reorder(targetWord, percentile))

We have a big range of item effects here – some near ceiling, some near floor, and we span the whole range

ggplot(data = df.multiAFC.distractor.summary.word, aes(x=targetWord, y=percentile, color=wordPairing)) +
  geom_point() +
  theme_few() + 
  theme(axis.text.x = element_text(size = 4, angle = 90, hjust = 1)) +
  facet_wrap(~numAFC)

Plotting individual item effects for the younger age groups is tricky…just not enough data from the youngest/oldest kids, who I’ve exlcuded here

df.multiAFC.distractor.summary.byAge <- df.multiAFC.distractor.summary.byAge %>% 
  group_by(targetWord) %>%
  mutate(avg_perc = mean(perc)) %>%
  ungroup() %>%
  mutate(targetWord = fct_reorder(targetWord, avg_perc))

ggplot(data = df.multiAFC.distractor.summary.byAge %>% filter(age_group>5 & age_group<12), aes(x=targetWord, y=perc, color=wordPairing)) +
  geom_point() +
  theme_few() + 
  theme(axis.text.x = element_text(size = 4, angle = 90, hjust = 1)) +
  facet_grid(numAFC~age_group)

Plot which items do vs. don’t follow the gradient

df.flag.pair <- df.multiAFC.distractor.summary.word %>% 
  pivot_wider(names_from = wordPairing, values_from = percentile) %>%
  filter((hard > target) | (easy > hard) | (easy > target)) %>% 
  add_column(flag = 1) %>% 
  select(numAFC, targetWord, flag)

df.multiAFC.distractor.summary.word.updated <- df.multiAFC.distractor.summary.word %>% 
  left_join(df.flag.pair) %>% 
  mutate(flag = ifelse(is.na(flag), "expected", "flag"))

hardest_items <- df.multiAFC.distractor.summary.word %>%
  group_by(targetWord) %>%
  filter(wordPairing=='target') %>%
  filter(numAFC==4) %>%
  arrange(percentile) %>%
  ungroup() %>%
  slice_max(order_by=-percentile, n=20)

hardest_items_wide <-  df.multiAFC.distractor.summary.word %>%
  filter(numAFC==4) %>%
  filter(targetWord %in% hardest_items$targetWord)  %>%
  arrange(targetWord)
  # pivot_wider(values_from = percentile, names_from = answerWord)

ggplot(data=df.multiAFC.distractor.summary.word.updated , aes(x=wordPairing, y=percentile)) +
  geom_point(size = 1, alpha = 0.6) + 
  geom_line(aes(group = targetWord, color = flag), alpha=.4) +
  facet_wrap(~numAFC) + 
  scale_color_manual(values=c( "#8F993E", "#E05A1D")) + 
  labs(x = "item distractor type",
       y = "percentage of responses")

Look at these individual items (n here is the number of items within afc that were flagged for being off)

df.multiAFC.distractor.summary.word.updated %>% filter(flag == "flag") %>% 
  group_by(targetWord) %>% 
  tally() %>%
  arrange(-n)

## # A tibble: 30 × 2
##    targetWord      n
##    <fct>       <int>
##  1 candlestick     9
##  2 sunflower       7
##  3 hedgehog        7
##  4 map             7
##  5 bamboo          7
##  6 sloth           7
##  7 pistachio       7
##  8 omelet          7
##  9 kimono          7
## 10 cymbal          7
## # ℹ 20 more rows

Make wide data structure for 4afc only

df.multiAFC.distractor.summary.wide <- df.multiAFC.distractor.summary %>%
  ungroup() %>% 
  filter(numAFC == 4) %>% 
  select(-c(n, answerWord)) %>% 
  pivot_wider(names_from = wordPairing, values_from = perc)

Look at specific items that are off (hard chosen less often than easy distractor)

df.multiAFC.distractor.summary.wide %>% 
  filter((hard + 0.1)< easy)

## # A tibble: 0 × 10
## # ℹ 10 variables: targetWord <chr>, numAFC <dbl>, totalAttempts <int>,
## #   clip_cor <dbl>, AoA_Est_Word2 <dbl>, AoA_Est_Word1 <dbl>, target <dbl>,
## #   distal <dbl>, hard <dbl>, easy <dbl>

Look at specific items that are off for 3afc (hard chosen less often than easy distractor)

df.multiAFC.distractor.summary %>%
  ungroup() %>% 
  filter(numAFC == 3) %>% 
  select(-c(n, answerWord)) %>% 
  pivot_wider(names_from = wordPairing, values_from = perc) %>%
  filter((hard + 0.1)< easy)

## # A tibble: 0 × 9
## # ℹ 9 variables: targetWord <chr>, numAFC <dbl>, totalAttempts <int>,
## #   clip_cor <dbl>, AoA_Est_Word2 <dbl>, AoA_Est_Word1 <dbl>, target <dbl>,
## #   hard <dbl>, easy <dbl>

Look at specific items that are off for 2afc (hard chosen less often than easy distractor)

df.multiAFC.distractor.summary %>%
  ungroup() %>% 
  filter(numAFC == 2) %>% 
  select(-c(n, answerWord)) %>% 
  pivot_wider(names_from = wordPairing, values_from = perc) %>%
  arrange(target)

## # A tibble: 216 × 8
##    targetWord  numAFC totalAttempts clip_cor AoA_Est_Word2 AoA_Est_Word1 target
##    <chr>        <dbl>         <int>    <dbl>         <dbl>         <dbl>  <dbl>
##  1 candlestick      2           254        1          5.61          5.61  0.386
##  2 mulch            2           281        1          9.22          9.22  0.395
##  3 grate            2           284        1          8.9           8.9   0.486
##  4 bobsled          2           259        1          9.38          9.38  0.533
##  5 cheese           2           282        1          4.33          4.33  0.596
##  6 freezer          2           245        1          4.63          4.63  0.604
##  7 thermos          2           296        1          8.29          8.29  0.608
##  8 sauerkraut       2           261        1          9.26          9.26  0.636
##  9 tuxedo           2           261        1          8.26          8.26  0.640
## 10 corset           2           266        1         10.7          10.7   0.647
## # ℹ 206 more rows
## # ℹ 1 more variable: hard <dbl>

Correlate clip with response patterns within each age group/nafc

ggplot(data=df.multiAFC.distractor.summary.byAge, aes(x=clip_cor, y=perc)) +
  geom_point(alpha=.1) +
  geom_smooth(method='lm') +
  theme_few() +
  ylab('Percent chosen') +
  xlab('Similarity in CLIP space') +
  facet_grid(numAFC~age_group) +
  stat_cor(method = "pearson") +
  ylim(0,1)

adult_patterns <- df.multiAFC.distractor.summary.byAge %>%
  filter(age_group==25)

clip_correlation_by_age_by_afc <- df.multiAFC.distractor.summary.byAge %>%
  group_by(numAFC, age_group) %>%
  summarize(rvalue = cor(clip_cor, perc))

Just witin all afc because grouping is tricky but oddly not that high relative to clip correlations, interesting… maybe just less precise per item given data sparisty

adult_correlation_by_age <- df.multiAFC.distractor.summary.byAge %>%
  group_by(age_group) %>%
  summarize(r_adults = cor(perc, adult_patterns$perc))

ggplot(data = clip_correlation_by_age_by_afc, aes(x=age_group, y=rvalue, col=numAFC)) +
  geom_point() +
  geom_smooth(aes(group=numAFC), span=1, alpha=.2) +
  theme_few() +
  ylab('CLIP-Behavior correlation') +
  xlab('Age in years')

summary(lm(data = clip_correlation_by_age_by_afc, rvalue ~ age_group + numAFC))

## 
## Call:
## lm(formula = rvalue ~ age_group + numAFC, data = clip_correlation_by_age_by_afc)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.25334 -0.08417  0.05009  0.09532  0.11780 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.524928   0.092007   5.705 4.62e-06 ***
## age_group    0.019753   0.003797   5.203 1.77e-05 ***
## numAFC      -0.009223   0.027573  -0.335    0.741    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1233 on 27 degrees of freedom
## Multiple R-squared:  0.5017, Adjusted R-squared:  0.4648 
## F-statistic: 13.59 on 2 and 27 DF,  p-value: 8.251e-05

Inferential stats

Look for interaction in predicting what items children choose as a function of CLIP simialrity

Add numafc as a covariate and random effects for the clip | target_word

summary(lmer(data = df.multiAFC.distractor.summary.byAge, perc ~ clip_cor*age_group + numAFC + (clip_cor|targetWord) + (1|totalAttempts)))

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: perc ~ clip_cor * age_group + numAFC + (clip_cor | targetWord) +  
##     (1 | totalAttempts)
##    Data: df.multiAFC.distractor.summary.byAge
## 
## REML criterion at convergence: -2697.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.6979 -0.7537  0.0265  0.7530  4.5649 
## 
## Random effects:
##  Groups        Name        Variance Std.Dev. Corr 
##  targetWord    (Intercept) 0.679099 0.82407       
##                clip_cor    0.827127 0.90946  -1.00
##  totalAttempts (Intercept) 0.004081 0.06388       
##  Residual                  0.036562 0.19121       
## Number of obs: 7218, groups:  targetWord, 108; totalAttempts, 86
## 
## Fixed effects:
##                      Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)        -6.206e-01  8.763e-02  1.406e+02  -7.082 6.24e-11 ***
## clip_cor            1.177e+00  9.522e-02  1.327e+02  12.365  < 2e-16 ***
## age_group          -1.040e-01  3.101e-03  6.998e+03 -33.528  < 2e-16 ***
## numAFC             -5.117e-03  2.994e-03  6.989e+03  -1.709   0.0875 .  
## clip_cor:age_group  1.183e-01  3.406e-03  6.962e+03  34.737  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) clp_cr ag_grp numAFC
## clip_cor    -0.986                     
## age_group   -0.318  0.312              
## numAFC      -0.169  0.065  0.009       
## clp_cr:g_gr  0.313 -0.321 -0.982 -0.008

Add in estimated aoa of word1/2 as covariates, same results

summary(lmer(data = df.multiAFC.distractor.summary.byAge, perc ~ clip_cor*age_group + numAFC + AoA_Est_Word2 + AoA_Est_Word1 + (clip_cor|targetWord) + (1|totalAttempts)))

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## perc ~ clip_cor * age_group + numAFC + AoA_Est_Word2 + AoA_Est_Word1 +  
##     (clip_cor | targetWord) + (1 | totalAttempts)
##    Data: df.multiAFC.distractor.summary.byAge
## 
## REML criterion at convergence: -2983
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.8656 -0.7194  0.0291  0.7229  4.4815 
## 
## Random effects:
##  Groups        Name        Variance Std.Dev. Corr 
##  targetWord    (Intercept) 0.808801 0.89933       
##                clip_cor    0.994525 0.99726  -1.00
##  totalAttempts (Intercept) 0.003988 0.06315       
##  Residual                  0.034852 0.18669       
## Number of obs: 7218, groups:  targetWord, 108; totalAttempts, 86
## 
## Fixed effects:
##                      Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)        -6.693e-01  9.572e-02  1.401e+02  -6.992 1.01e-10 ***
## clip_cor            1.178e+00  1.027e-01  1.255e+02  11.462  < 2e-16 ***
## age_group          -1.035e-01  3.028e-03  6.990e+03 -34.187  < 2e-16 ***
## numAFC             -9.288e-03  2.933e-03  6.985e+03  -3.166  0.00155 ** 
## AoA_Est_Word2       4.401e-02  2.484e-03  6.025e+03  17.720  < 2e-16 ***
## AoA_Est_Word1      -3.431e-02  3.200e-03  3.107e+02 -10.721  < 2e-16 ***
## clip_cor:age_group  1.178e-01  3.326e-03  6.954e+03  35.430  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) clp_cr ag_grp numAFC AA_E_W2 AA_E_W1
## clip_cor    -0.973                                     
## age_group   -0.285  0.282                              
## numAFC      -0.153  0.059  0.008                       
## AA_Est_Wrd2 -0.009 -0.003  0.009 -0.082                
## AA_Est_Wrd1 -0.137  0.015 -0.004  0.066 -0.674         
## clp_cr:g_gr  0.280 -0.291 -0.982 -0.008 -0.009   0.002

VisualVocab - CDS abstract, distractor analysis

Bria Long, Anya Ma

2023-09-20