library(tidyverse)
library(lubridate)
library(ggthemes)
library(assertthat)
library(langcog) # for CIs
library(mirt)
library(ggpubr)
library(knitr)
library(dplyr)
library(here)
library(lme4)
library(lmerTest)
test_corpus <- read_csv(here::here("item_generation/4_create_multi_AFC_stimuli/new_test.csv")) %>%
filter(!Word1 %in% c('honey','scrabble')) # manually excluded
test_clip_cor <- read_csv(here::here("item_generation/4_create_multi_AFC_stimuli/exp1_all_trials2023-04-11.csv")) %>%
select(Word1, Word2, trial_type, cor, AoA_Est_Word2, AoA_Est_Word1) %>%
rename(wordPairing = trial_type) %>%
rename(clip_cor = cor) %>%
filter(Word1 %in% unique(test_corpus$Word1)) %>%
filter(!Word1 %in% c('honey','scrabble')) # manually excluded
Create data structure and merge that has clip cor =1 for target selection
test_clip_cor_identity <- test_clip_cor %>%
select(Word1, AoA_Est_Word1) %>%
distinct(Word1, AoA_Est_Word1) %>%
mutate(Word2 = Word1) %>%
mutate(AoA_Est_Word2 = AoA_Est_Word1) %>%
mutate(clip_cor = 1) %>%
mutate(wordPairing = 'target')
test_clip_cor <- test_clip_cor %>%
full_join(test_clip_cor_identity) %>%
arrange(Word1)
clip_to_join <- test_clip_cor %>%
rename(targetWord = Word1) %>%
rename(answerWord = Word2)
test_corpus <- test_corpus %>%
left_join(test_clip_cor)
sum(is.na(test_corpus$clip_cor))
## [1] 0
bing = read_csv(here::here('data/validation_data/preprocessed_data/all_bing_data_for_item_info.csv')) %>%
mutate(age_group = as.numeric(floor(age_in_months/12))) %>%
select(-block)
multi-afc adults
adults <- read_csv(here::here("data/data_multiafc/multi-afc-april28.csv")) %>%
mutate(age_group = as.numeric(25)) %>%
select(-block)
# trial_data <- read_csv(here::here("data/data_multiafc/prod_roar-prod_tasks_vocab_raw_trials_081623.csv")) %>%
trial_data <- read_csv(here::here("data/rocketship_data/multi-afc-with-meta.csv")) %>%
select(-block) %>%
mutate(age_group = as.numeric(age_group)) %>%
filter(age_group<12) %>% # older kids just don't have enough trials
full_join(bing) %>% # 3-5 year olds
full_join(adults) %>%
filter(studyId %in% c("school-multiAFC", "prolific-multiAFC", "validation-multiAFC"), completed == TRUE, task =="test_response") %>%
mutate(schoolId = ifelse(studyId == "school-multiAFC", "school", schoolId)) %>%
mutate(options_clean = str_replace_all(options, "'", "\"")) # necessary for json parsing
Create functions for parsing options from nested json
parse_json_column <- function(json_col) {
map(json_col, ~as.data.frame(t(unlist(jsonlite::fromJSON(.x, flatten = TRUE)))))
}
parsed_list <- parse_json_column(trial_data$options_clean)
# Combine the list of data frames into a single data frame
parsed_df <- bind_rows(parsed_list)
# Combine the parsed columns with the original data frame
final_df <- cbind(trial_data %>% select(-options), parsed_df)
df.multiAFC.trials <- final_df %>%
pivot_longer(cols = c("0", "1", "2", "3")) %>%
filter(answerWord == value) %>%
select(pid, runId, schoolId, answerWord, targetWord, numAFC, value, correct, rt, age_group) %>%
left_join(test_corpus %>%
rename(targetWord = "Word1", answerWord = "Word2")) %>%
filter(is.na(itemGroup) | (itemGroup == "test")) %>%
mutate(wordPairing = ifelse(is.na(wordPairing), "target", wordPairing)) %>%
filter(!targetWord %in% c("ant","ball","bear","scrabble","honey")) %>%
select(pid, runId, schoolId, answerWord, targetWord, numAFC, value, correct, rt, wordPairing, age_group, clip_cor)
Create age/group and number of participants for filtering for plots / age-based analyses
age_group_pid <- df.multiAFC.trials %>%
group_by(age_group) %>%
summarize(num_pids = length(unique(pid)))
We have 1446 children aged 3-11 years, and 205 adults.
Need to write code to fill out response pattern for all possible target/distractor pairings…for use later.
full_item_structure_4AFC <- clip_to_join %>%
mutate(numAFC = 4)
full_item_structure_3AFC <- clip_to_join %>%
filter(wordPairing != 'distal') %>%
mutate(numAFC = 3)
full_item_structure_2AFC <- clip_to_join %>%
filter(!wordPairing %in% c('distal','easy')) %>%
mutate(numAFC = 2)
full_item_structure <- full_item_structure_4AFC %>%
full_join(full_item_structure_3AFC) %>%
full_join(full_item_structure_2AFC)
Now fill this out by all ages in the dataset, oof
full_item_structure_by_age = map_df(age_group_pid$age_group, ~full_item_structure %>% mutate(age_group = .x))
First do this across all age groups
df.multiAFC.totalAttempts <- df.multiAFC.trials %>%
group_by(targetWord, numAFC) %>%
tally() %>%
dplyr::rename(totalAttempts = n)
df.multiAFC.distractor.summary <- df.multiAFC.trials %>%
group_by(targetWord, answerWord, numAFC, wordPairing) %>%
tally() %>%
left_join(df.multiAFC.totalAttempts) %>%
mutate(perc = n/totalAttempts) %>%
full_join(full_item_structure) %>% # need to fill out item structure (but not by age)
mutate(perc = replace_na(perc, replace=0))
hist(df.multiAFC.totalAttempts$totalAttempts)
target_pc <- df.multiAFC.distractor.summary %>%
filter(wordPairing=='target')
hist(target_pc$perc)
Look at low/high accuracy items across all ages
low_acc_items <- target_pc %>%
filter(perc<.5) %>%
kable()
high_acc_items <- target_pc %>%
filter(perc>.95) %>%
kable()
Construct same dataframe but now by age group
df.multiAFC.totalAttempts.byAge <- df.multiAFC.trials %>%
group_by(targetWord, numAFC, age_group) %>%
tally() %>%
dplyr::rename(totalAttempts = n)
df.multiAFC.distractor.summary.byAge <- df.multiAFC.trials %>%
group_by(targetWord, answerWord, numAFC, wordPairing, age_group) %>%
tally() %>%
left_join(df.multiAFC.totalAttempts.byAge) %>%
mutate(perc = n/totalAttempts) %>%
full_join(full_item_structure_by_age)
couple of sanity checks for missing values that shuold be zero
assert_that(sum(is.na(df.multiAFC.distractor.summary.byAge$age_group))==0)
## [1] TRUE
assert_that(sum(is.na(df.multiAFC.distractor.summary.byAge$clip_cor))==0)
## [1] TRUE
There are some missing observations because these items were never chosen on a given trial/age group – need to fill those out
But we also have some target/distractor/age group pairings that are VERY sparsely populated –
hist(df.multiAFC.distractor.summary.byAge$totalAttempts)
For now, create “0” value in the percent chosen category
sum(is.na(df.multiAFC.distractor.summary.byAge$perc))
## [1] 2502
df.multiAFC.distractor.summary.byAge <- df.multiAFC.distractor.summary.byAge %>%
mutate(perc = replace_na(perc, replace=0))
Calcualte summary by distractor type and numAFC by age, group, use confidence intervals here
df.multiAFC.distractor.summary.perc <- df.multiAFC.distractor.summary.byAge %>%
ungroup() %>%
mutate(wordPairing = factor(wordPairing, levels = c('target','hard','easy','distal'))) %>% # just reordering
group_by(age_group, numAFC, wordPairing) %>%
multi_boot_standard(col = 'perc') %>% # compute CIs
mutate(number_afc = as.factor(paste0(numAFC,' AFC'))) #cosmetic for plots
First visualize within each age group – can plots nAFCs by different colors
Looks like gradient is getting sharper with age, as expected
ggplot(data = df.multiAFC.distractor.summary.perc, aes(x=wordPairing, y=mean, col=number_afc)) +
geom_point() +
geom_linerange(aes(y=mean, ymax = ci_upper, ymin = ci_lower)) +
geom_line(aes(group=number_afc)) +
facet_wrap(~number_afc, nrow=3) +
theme(aspect.ratio=.75) +
theme_few() +
ylab('Proportion images chosen') +
facet_wrap(~age_group)
Now plot within afc, with age groups as colors
ggplot(data = df.multiAFC.distractor.summary.perc %>% filter(age_group<25), aes(x=wordPairing, y=mean, col=age_group)) +
geom_point(alpha=.8) +
geom_point(data = df.multiAFC.distractor.summary.perc %>% filter(age_group==25), color='grey', alpha=.8) +
geom_linerange(aes(y=mean, ymax = ci_upper, ymin = ci_lower), alpha=.3) +
geom_linerange(data = df.multiAFC.distractor.summary.perc %>% filter(age_group==25), aes(y=mean, ymax = ci_upper, ymin = ci_lower), alpha=.3, color='grey') +
geom_line(data = df.multiAFC.distractor.summary.perc %>% filter(age_group==25), color='grey',aes(group=age_group)) +
geom_line(aes(group=age_group)) +
facet_wrap(~number_afc) +
theme(aspect.ratio=.75) +
scale_color_viridis_c(name='Age (in years)') +
xlab('') +
ylab('Proportion chosen') +
theme_few() +
ylim(0,1) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# ggtitle('Proportion images chosen by # of distractors')
ggplot(data = df.multiAFC.distractor.summary.perc, aes(x=age_group, y=mean, col=wordPairing)) +
geom_point() +
geom_linerange(aes(y=mean, ymax = ci_upper, ymin = ci_lower), alpha=.2) +
geom_smooth(aes(group=wordPairing), span=10) +
facet_wrap(~number_afc) +
theme(aspect.ratio=.75) +
# scale_color_viridis_c(name='Age (in years)') +
xlab('') +
ylab('Proportion chosen') +
theme_few() +
ylim(0,1) +
ggtitle('Proportion images chosen by # of distractors')
df.multiAFC.distractor.summary.word <- df.multiAFC.distractor.summary %>%
group_by(numAFC, wordPairing, targetWord) %>%
summarise(percentile = mean(perc)) %>%
arrange(targetWord, numAFC) %>%
ungroup() %>%
mutate(targetWord = fct_reorder(targetWord, percentile))
We have a big range of item effects here – some near ceiling, some near floor, and we span the whole range
ggplot(data = df.multiAFC.distractor.summary.word, aes(x=targetWord, y=percentile, color=wordPairing)) +
geom_point() +
theme_few() +
theme(axis.text.x = element_text(size = 4, angle = 90, hjust = 1)) +
facet_wrap(~numAFC)
Plotting individual item effects for the younger age groups is tricky…just not enough data from the youngest/oldest kids, who I’ve exlcuded here
df.multiAFC.distractor.summary.byAge <- df.multiAFC.distractor.summary.byAge %>%
group_by(targetWord) %>%
mutate(avg_perc = mean(perc)) %>%
ungroup() %>%
mutate(targetWord = fct_reorder(targetWord, avg_perc))
ggplot(data = df.multiAFC.distractor.summary.byAge %>% filter(age_group>5 & age_group<12), aes(x=targetWord, y=perc, color=wordPairing)) +
geom_point() +
theme_few() +
theme(axis.text.x = element_text(size = 4, angle = 90, hjust = 1)) +
facet_grid(numAFC~age_group)
Plot which items do vs. don’t follow the gradient
df.flag.pair <- df.multiAFC.distractor.summary.word %>%
pivot_wider(names_from = wordPairing, values_from = percentile) %>%
filter((hard > target) | (easy > hard) | (easy > target)) %>%
add_column(flag = 1) %>%
select(numAFC, targetWord, flag)
df.multiAFC.distractor.summary.word.updated <- df.multiAFC.distractor.summary.word %>%
left_join(df.flag.pair) %>%
mutate(flag = ifelse(is.na(flag), "expected", "flag"))
hardest_items <- df.multiAFC.distractor.summary.word %>%
group_by(targetWord) %>%
filter(wordPairing=='target') %>%
filter(numAFC==4) %>%
arrange(percentile) %>%
ungroup() %>%
slice_max(order_by=-percentile, n=20)
hardest_items_wide <- df.multiAFC.distractor.summary.word %>%
filter(numAFC==4) %>%
filter(targetWord %in% hardest_items$targetWord) %>%
arrange(targetWord)
# pivot_wider(values_from = percentile, names_from = answerWord)
ggplot(data=df.multiAFC.distractor.summary.word.updated , aes(x=wordPairing, y=percentile)) +
geom_point(size = 1, alpha = 0.6) +
geom_line(aes(group = targetWord, color = flag), alpha=.4) +
facet_wrap(~numAFC) +
scale_color_manual(values=c( "#8F993E", "#E05A1D")) +
labs(x = "item distractor type",
y = "percentage of responses")
Look at these individual items (n here is the number of items within afc that were flagged for being off)
df.multiAFC.distractor.summary.word.updated %>% filter(flag == "flag") %>%
group_by(targetWord) %>%
tally() %>%
arrange(-n)
## # A tibble: 30 × 2
## targetWord n
## <fct> <int>
## 1 candlestick 9
## 2 sunflower 7
## 3 hedgehog 7
## 4 map 7
## 5 bamboo 7
## 6 sloth 7
## 7 pistachio 7
## 8 omelet 7
## 9 kimono 7
## 10 cymbal 7
## # ℹ 20 more rows
Make wide data structure for 4afc only
df.multiAFC.distractor.summary.wide <- df.multiAFC.distractor.summary %>%
ungroup() %>%
filter(numAFC == 4) %>%
select(-c(n, answerWord)) %>%
pivot_wider(names_from = wordPairing, values_from = perc)
Look at specific items that are off (hard chosen less often than easy distractor)
df.multiAFC.distractor.summary.wide %>%
filter((hard + 0.1)< easy)
## # A tibble: 0 × 10
## # ℹ 10 variables: targetWord <chr>, numAFC <dbl>, totalAttempts <int>,
## # clip_cor <dbl>, AoA_Est_Word2 <dbl>, AoA_Est_Word1 <dbl>, target <dbl>,
## # distal <dbl>, hard <dbl>, easy <dbl>
Look at specific items that are off for 3afc (hard chosen less often than easy distractor)
df.multiAFC.distractor.summary %>%
ungroup() %>%
filter(numAFC == 3) %>%
select(-c(n, answerWord)) %>%
pivot_wider(names_from = wordPairing, values_from = perc) %>%
filter((hard + 0.1)< easy)
## # A tibble: 0 × 9
## # ℹ 9 variables: targetWord <chr>, numAFC <dbl>, totalAttempts <int>,
## # clip_cor <dbl>, AoA_Est_Word2 <dbl>, AoA_Est_Word1 <dbl>, target <dbl>,
## # hard <dbl>, easy <dbl>
Look at specific items that are off for 2afc (hard chosen less often than easy distractor)
df.multiAFC.distractor.summary %>%
ungroup() %>%
filter(numAFC == 2) %>%
select(-c(n, answerWord)) %>%
pivot_wider(names_from = wordPairing, values_from = perc) %>%
arrange(target)
## # A tibble: 216 × 8
## targetWord numAFC totalAttempts clip_cor AoA_Est_Word2 AoA_Est_Word1 target
## <chr> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
## 1 candlestick 2 254 1 5.61 5.61 0.386
## 2 mulch 2 281 1 9.22 9.22 0.395
## 3 grate 2 284 1 8.9 8.9 0.486
## 4 bobsled 2 259 1 9.38 9.38 0.533
## 5 cheese 2 282 1 4.33 4.33 0.596
## 6 freezer 2 245 1 4.63 4.63 0.604
## 7 thermos 2 296 1 8.29 8.29 0.608
## 8 sauerkraut 2 261 1 9.26 9.26 0.636
## 9 tuxedo 2 261 1 8.26 8.26 0.640
## 10 corset 2 266 1 10.7 10.7 0.647
## # ℹ 206 more rows
## # ℹ 1 more variable: hard <dbl>
ggplot(data=df.multiAFC.distractor.summary.byAge, aes(x=clip_cor, y=perc)) +
geom_point(alpha=.1) +
geom_smooth(method='lm') +
theme_few() +
ylab('Percent chosen') +
xlab('Similarity in CLIP space') +
facet_grid(numAFC~age_group) +
stat_cor(method = "pearson") +
ylim(0,1)
adult_patterns <- df.multiAFC.distractor.summary.byAge %>%
filter(age_group==25)
clip_correlation_by_age_by_afc <- df.multiAFC.distractor.summary.byAge %>%
group_by(numAFC, age_group) %>%
summarize(rvalue = cor(clip_cor, perc))
Just witin all afc because grouping is tricky but oddly not that high relative to clip correlations, interesting… maybe just less precise per item given data sparisty
adult_correlation_by_age <- df.multiAFC.distractor.summary.byAge %>%
group_by(age_group) %>%
summarize(r_adults = cor(perc, adult_patterns$perc))
ggplot(data = clip_correlation_by_age_by_afc, aes(x=age_group, y=rvalue, col=numAFC)) +
geom_point() +
geom_smooth(aes(group=numAFC), span=1, alpha=.2) +
theme_few() +
ylab('CLIP-Behavior correlation') +
xlab('Age in years')
summary(lm(data = clip_correlation_by_age_by_afc, rvalue ~ age_group + numAFC))
##
## Call:
## lm(formula = rvalue ~ age_group + numAFC, data = clip_correlation_by_age_by_afc)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.25334 -0.08417 0.05009 0.09532 0.11780
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.524928 0.092007 5.705 4.62e-06 ***
## age_group 0.019753 0.003797 5.203 1.77e-05 ***
## numAFC -0.009223 0.027573 -0.335 0.741
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1233 on 27 degrees of freedom
## Multiple R-squared: 0.5017, Adjusted R-squared: 0.4648
## F-statistic: 13.59 on 2 and 27 DF, p-value: 8.251e-05
Look for interaction in predicting what items children choose as a function of CLIP simialrity
Add numafc as a covariate and random effects for the clip | target_word
summary(lmer(data = df.multiAFC.distractor.summary.byAge, perc ~ clip_cor*age_group + numAFC + (clip_cor|targetWord) + (1|totalAttempts)))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: perc ~ clip_cor * age_group + numAFC + (clip_cor | targetWord) +
## (1 | totalAttempts)
## Data: df.multiAFC.distractor.summary.byAge
##
## REML criterion at convergence: -2697.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.6979 -0.7537 0.0265 0.7530 4.5649
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## targetWord (Intercept) 0.679099 0.82407
## clip_cor 0.827127 0.90946 -1.00
## totalAttempts (Intercept) 0.004081 0.06388
## Residual 0.036562 0.19121
## Number of obs: 7218, groups: targetWord, 108; totalAttempts, 86
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -6.206e-01 8.763e-02 1.406e+02 -7.082 6.24e-11 ***
## clip_cor 1.177e+00 9.522e-02 1.327e+02 12.365 < 2e-16 ***
## age_group -1.040e-01 3.101e-03 6.998e+03 -33.528 < 2e-16 ***
## numAFC -5.117e-03 2.994e-03 6.989e+03 -1.709 0.0875 .
## clip_cor:age_group 1.183e-01 3.406e-03 6.962e+03 34.737 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) clp_cr ag_grp numAFC
## clip_cor -0.986
## age_group -0.318 0.312
## numAFC -0.169 0.065 0.009
## clp_cr:g_gr 0.313 -0.321 -0.982 -0.008
Add in estimated aoa of word1/2 as covariates, same results
summary(lmer(data = df.multiAFC.distractor.summary.byAge, perc ~ clip_cor*age_group + numAFC + AoA_Est_Word2 + AoA_Est_Word1 + (clip_cor|targetWord) + (1|totalAttempts)))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## perc ~ clip_cor * age_group + numAFC + AoA_Est_Word2 + AoA_Est_Word1 +
## (clip_cor | targetWord) + (1 | totalAttempts)
## Data: df.multiAFC.distractor.summary.byAge
##
## REML criterion at convergence: -2983
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.8656 -0.7194 0.0291 0.7229 4.4815
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## targetWord (Intercept) 0.808801 0.89933
## clip_cor 0.994525 0.99726 -1.00
## totalAttempts (Intercept) 0.003988 0.06315
## Residual 0.034852 0.18669
## Number of obs: 7218, groups: targetWord, 108; totalAttempts, 86
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -6.693e-01 9.572e-02 1.401e+02 -6.992 1.01e-10 ***
## clip_cor 1.178e+00 1.027e-01 1.255e+02 11.462 < 2e-16 ***
## age_group -1.035e-01 3.028e-03 6.990e+03 -34.187 < 2e-16 ***
## numAFC -9.288e-03 2.933e-03 6.985e+03 -3.166 0.00155 **
## AoA_Est_Word2 4.401e-02 2.484e-03 6.025e+03 17.720 < 2e-16 ***
## AoA_Est_Word1 -3.431e-02 3.200e-03 3.107e+02 -10.721 < 2e-16 ***
## clip_cor:age_group 1.178e-01 3.326e-03 6.954e+03 35.430 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) clp_cr ag_grp numAFC AA_E_W2 AA_E_W1
## clip_cor -0.973
## age_group -0.285 0.282
## numAFC -0.153 0.059 0.008
## AA_Est_Wrd2 -0.009 -0.003 0.009 -0.082
## AA_Est_Wrd1 -0.137 0.015 -0.004 0.066 -0.674
## clp_cr:g_gr 0.280 -0.291 -0.982 -0.008 -0.009 0.002