Setup

library(tidyverse)
library(lubridate)
library(ggthemes)
library(mirt)
library(knitr)
library(dplyr)
library(ggpubr)
library(assertthat)

Load data

Read in aoa and other meta

item_meta <- read_csv(file = here::here('data/preprocessed_data/exp1_all_trials2023-04-11.csv'))

word1_aoa <- item_meta %>%
  distinct(Word1, AoA_Est_Word1)  %>%
  rename(targetWord = Word1)

Load 2afc data, which has already been cleaned

school_data_2afc_cleaned <- read_csv(file = here::here('data/preprocessed_data/all_preprocessed_data.csv')) %>%
  mutate(word1 = str_replace(word1, ' ', '.')) %>%
  mutate(word2 = str_replace(word2, ' ', '.')) %>%
  mutate(item_pair = paste0(word1,'_',word2)) %>%
  mutate(numAFC = 2) %>%
  mutate(sub_id = pid, numAFC = as.character(numAFC)) %>%
  filter(cohort=='school') %>%
  mutate(administration = 'initial')

Load meta data for 2afc participants

school_2afc_meta <- read_csv(here::here('data/school_meta_data/cs01-metadata-age.csv')) %>%
  as_tibble() %>%
  select(pid, age) %>%
  mutate(age_numeric = age, age_group = age) %>% # don't have precise age 
  select(-age) %>%
  filter(pid %in% unique(school_data_2afc_cleaned$pid)) %>%
  mutate(age_numeric = as.numeric(age_numeric))

We have 1606 rocketship children who completed the 2afc version.

There were 393 items in this initial version, including CLIP and DAIVT items.

Multiafc version

school_data_multi_afc <- read_csv(here::here('data/rocketship_data/multi-afc-with-meta.csv')) 

Get rid of participants who missed more than 1 catch trial (out of three)

catch_low <- school_data_multi_afc %>%
  group_by(pid, itemType) %>%
  summarize(pc = mean(correct), num_catch = n()) %>%
  filter(itemType=='catch') %>%
  filter(pc <.5)
school_data_multi_afc_cleaned <- school_data_multi_afc %>%
  filter(!pid %in% catch_low$pid) %>% # exclude low acc kids
  filter(!is.na(as.numeric(age_group))) %>% # exclude adults
  filter(itemType=='test') %>% # only test trials
  filter(completed==TRUE) %>%
  mutate(cohort='school')
  
adults_multiafc_cleaned <- school_data_multi_afc %>%
  filter(!pid %in% catch_low$pid) %>% # exclude low acc kids
  filter(!pid %in% c('wm-try')) %>% # test pid
  filter(age_group == 'Adult') %>% # include adults
  filter(itemType=='test') %>% # only test trials
  filter(completed==TRUE) 

We have 1428 rocketship who completed the multiafc version.

Load bing kids on multiafc

Load and preprocess bing demographics

bing_demo <- read_csv(here::here('data/validation_data/demographics.csv')) %>%
    as_tibble %>%
    select(SubID, `Age (months)`, Exclude, Date) %>%
    rename(age_in_months = `Age (months)`) %>%
    rename(pid = SubID) %>%
    mutate(age_numeric = age_in_months/12) %>%
    mutate(age_group = as.numeric(floor(age_numeric))) %>%
    select(pid, age_group, age_numeric)
bing_multiafc<- read_csv(here::here('data/validation_data/preprocessed_data/all_bing_data_for_item_info.csv'))  %>%
  mutate(cohort = 'bing') %>%
  filter(itemType == 'test') %>%
  select(-block) 

all_kids_multiafc <- school_data_multi_afc_cleaned %>%
  full_join(bing_multiafc)

We have 65 bing kids who completed the multiafc version.

We have 1493 rocketship + bing kids who completed the multiafc version.

Filter out the 2afc data so that we’re only including the CLIP items.

school_data_2afc_cleaned_clip_only <- school_data_2afc_cleaned %>%
  filter(word1 %in% unique(school_data_multi_afc$targetWord))

There were 393 items in this initial version, including CLIP and DAIVT items.

There were 218 items in this initial version, including only CLIP items.

Join together pids from both administrations

completed_twice_pids <- school_data_2afc_cleaned_clip_only %>%
  filter(pid %in% unique(school_data_multi_afc_cleaned$pid) ) %>%
  distinct(pid)

There are 871 kids who completed both versions.

Compute correct by pid

correct_by_pid <- school_data_2afc_cleaned %>%
  group_by(pid) %>%
  summarize(prop_correct_2afc = mean(correct))  

Meta data for joining back

pid_by_age <- school_data_multi_afc_cleaned %>%
  mutate(age_numeric = as.numeric(age_numeric), age_group = as.numeric(age_group)) %>%
  distinct(pid, age_group, age_numeric) %>%
  full_join(school_2afc_meta) %>%
  full_join(bing_demo)

Compute data structure with accuracy overall for repeat kids (don’t get bing kids here)

correct_by_pid_by_multi_afc <- school_data_multi_afc_cleaned %>%
  mutate(age_group = as.numeric(age_group)) %>%
  group_by(pid, age_group) %>%
  summarize(prop_correct_overall = mean(correct)) 

completed_twice_overall <- correct_by_pid %>% # 2afc initial
  left_join(correct_by_pid_by_multi_afc, by=c('pid')) %>% # join overall multiafc
  filter(!is.na(prop_correct_overall))

Make data structure now with multiafc

correct_by_pid_by_afc <- school_data_multi_afc_cleaned %>%
  group_by(pid, numAFC, age_group) %>%
  summarize(prop_correct_nafc = mean(correct)) 

Make it wider to join

correct_by_pid_by_afc_wide <- correct_by_pid_by_afc %>%
  rowwise() %>%
  mutate(numAFC_string = paste0('num',numAFC)) %>%
  select(-numAFC) %>%
  pivot_wider(names_from = numAFC_string, values_from = 'prop_correct_nafc') 

Now join back with 2afc data, only get kids in both

completed_twice <- correct_by_pid %>%
  left_join(correct_by_pid_by_afc) %>%
  filter(!is.na(prop_correct_nafc))

Make a long version

completed_twice_long <- correct_by_pid %>%
  left_join(correct_by_pid_by_afc_wide) %>%
  filter(!is.na(num2)) 

Make plots of overall accuracy correlations

ggplot(data=completed_twice_overall %>% mutate(age_group = as.numeric(age_group)), aes(x=prop_correct_2afc, y=prop_correct_overall, col=age_group)) +
  geom_point(alpha=.7) +
  geom_smooth() +
  theme_few(base_size=12) +
  ggtitle('Initial (2afc) vs. second (multiafc) visual vocab admin') +
  ggpubr::stat_cor(size=3) +
  xlab('Initial 2afc accuracy') +
  ylab('Second multiaf accuracy (overall)') +
  facet_wrap(~age_group)

Same when we look at pc correlations overall, doesn’t seem to be about some kids always being at ceiling – there’s a range

ggplot(data=completed_twice_overall %>% mutate(age_group = as.numeric(age_group)), aes(x=prop_correct_2afc, y=prop_correct_overall, col=age_group)) +
  geom_point(alpha=.7) +
  geom_smooth() +
  theme_few(base_size=12) +
  ggtitle('Initial (2afc) vs. second (multiafc) visual vocab admin') +
  ggpubr::stat_cor(size=3) +
  xlab('Initial 2afc accuracy') +
  ylab('Second multiafc accuracy (overall)') 

Correlations between first and second administration aren’t that high, and it doesn’t matter which AFC trials we look at

ggplot(data=completed_twice, aes(x=prop_correct_2afc, y=prop_correct_nafc, col=as.numeric(age_group))) +
  geom_point(alpha=.6) +
  geom_smooth() +
  theme_few(base_size=16) +
  ggtitle('Initial vs. second adminstration by AFC') +
  facet_wrap(~numAFC) +
  xlab('Initial 2afc acc') +
  ylab('Second multiafc acc') +
  stat_cor() +
  theme(legend.position='none')

Estimate thetas for each kid and correlate

OK, so maybe these correlations are low because there is variability in the items, and we want to see what it’s likke when we estimate thetas for each kids.

To do so, let’s go ahead and use ALL of the data to fit a model predicting thetas for all kids together

First read back in prior trial level data and munge it

Use preprocessed data from other children/adults who have taken any version to estimate thetas for each child.

We need “item_ids” to be consistent across the datasets, so we have so munging to do

First get meta info on distractors for 2afc multi-afc hard trials so we can merge across the same trial tyope from both experiments

multiafc_distractors = read_csv(here::here('data/preprocessed_data/exp1_all_stimuli_april_2023.csv'))  %>%
  mutate(targetWord = Word1) %>%
  filter(trial_type == 'hard')  %>%
  select(targetWord, Word2)

Load all 2afc data in prolific/school cohort and make word1_word2 trialId

all_2afc_data <- read_csv(file = here::here('data/preprocessed_data/all_preprocessed_data.csv')) %>%
  filter(task=='test_response') %>% # only test trials should be true
  filter(completed==TRUE) %>% # completed the assessment
  filter(cohort %in% c('school','prolific')) %>%
  filter(word1 %in% multiafc_distractors$targetWord) %>% # no daivt distractors
  mutate(itemId = paste(word1, word2, sep = "_")) %>%
  mutate(numAFC = 2) %>%
  rename(targetWord = word1) %>%
  mutate(administration = '2afc') %>%
  select(pid, targetWord, itemId, correct, numAFC, administration, cohort)

All multiafc kid data - not including adults right now as they just make the ceiling problem worse.

Now munge data for identical 2afc trials from the multiafc experiment to have the same itemID format (word1_word2)

all_multiafc_data_2afc <- all_kids_multiafc %>%
  filter(itemType=='test') %>%
  filter(numAFC==2) %>%
  right_join(multiafc_distractors) %>%
  mutate(itemId = paste(targetWord, Word2, sep = "_"))  %>%
  mutate(administration = 'multiafc') %>%
  select(pid, targetWord, itemId, correct, numAFC, administration, cohort)

Make format for multiafc (word1_numafc) for other trials and then merge

kid_data_multi_afc_munged <- all_kids_multiafc %>%
  mutate(administration = 'multiafc') %>%
  filter(numAFC > 2) %>%
  mutate(itemId = paste(targetWord, numAFC, sep = "_")) %>%
  select(pid, targetWord, itemId, correct, numAFC, administration, cohort) %>%
  full_join(all_multiafc_data_2afc)

Merge multiafc and 2afc trial level data with same format

all_data <- kid_data_multi_afc_munged %>%
  full_join(all_2afc_data)

Sanity check numbers and participants

all_data %>%
  group_by(administration, cohort) %>%
  summarize(num_participants = length(unique(pid))) %>%
  kable()
administration cohort num_participants
2afc school 1580
multiafc bing 65
multiafc school 1428

Sanity check how much data we have by items

data_by_item <- all_data %>%
  group_by(itemId) %>%
  summarize(num_responses = length(unique(pid)))

Should have no items with less than 100 responses

what <- data_by_item %>% filter(num_responses<100)

check <- all_data %>%
  filter(itemId %in% what$itemId)

Once bunch of items that were in both versions, other clusters from the multiafc versions only

hist(data_by_item$num_responses)

Get this itemID list for merging after running the models

Eliminate items, people that are nearly at ceiling in this sample

ceiling_people<- all_data %>%
  select(pid, correct) %>%
  group_by(pid) %>%
  summarize(prop_correct = mean(correct), num_responses = n()) %>%
  filter(prop_correct>.98)

This doesn’t eliminate any data right now but lots of items are close to the 200 threshold, ugh

not_enough_data_by_item <- all_data %>%
  group_by(itemId) %>%
  summarize(trials_per_id = length(unique(pid))) %>%
  filter(trials_per_id<175)

Eliminate people that didn’t complete enough trials with these items (not sure if we should do this since we’re estimating thetas with other data?) It’s about 100 people.

not_enough_data_by_person <- all_data %>%
  filter(administration=='2afc') %>%
  filter(!itemId %in% not_enough_data_by_item$itemId) %>%
  group_by(pid) %>%
  summarize(trials_per_id = length(unique(itemId))) %>%
  filter(trials_per_id<20)
ceiling_items<- all_data %>%
  filter(!pid %in% ceiling_people$pid) %>%
  filter(!pid %in% not_enough_data_by_person$pid) %>%
  select(itemId, correct) %>%
  group_by(itemId) %>%
  summarize(prop_correct = mean(correct), num_responses = n()) %>%
  filter(prop_correct>.97)

cleaned data

cleaned_data <- all_data %>%
  filter(!itemId %in% ceiling_items$itemId) %>%
  filter(!itemId %in% not_enough_data_by_item$itemId) %>%
  filter(!pid %in% ceiling_people$pid) %>%
  filter(!pid %in% not_enough_data_by_person$pid)
itemIdforIRT <- cleaned_data %>%
  distinct(targetWord, numAFC, itemId)

We have FOUR trial types per word – 2AFC hard 2AFC easy 3AFC 4AFC

trials_per_word <- itemIdforIRT %>%
  group_by(targetWord) %>%
  summarize(numAFC = length(unique(itemId)))

Make structure for IRT models

This structure aggregates across administrations for each participant, e.g., if they got a trial right on one take and wrong on another the value is .5

d_wide_all<- cleaned_data %>%
  select(itemId, correct, pid) %>%
  ungroup() %>%
  arrange(itemId) %>%
  ungroup() %>%
  pivot_wider(names_from=itemId, values_from=correct, values_fn = ~mean(.x)) %>%
  ungroup()

d_mat_all <- d_wide_all %>%
  select(-pid) %>%
  data.frame %>%
  data.matrix 

rownames(d_mat_all) <- d_wide_all$pid

Let’s just look at difficulties by item in this subset that we’re modeling

by_item <- cleaned_data %>%
  group_by(targetWord, numAFC) %>%
  summarize(prop_correct = mean(correct)) %>%
  left_join(word1_aoa) %>%
  ungroup() %>%
  mutate(targetWord = fct_reorder(targetWord, -prop_correct))

ggplot(data = by_item, aes(x=targetWord, y=prop_correct, col=AoA_Est_Word1)) +
  geom_point() +
  theme_few() +
  facet_wrap(~numAFC, nrow=3) +
  theme(axis.text.x = element_text(angle = 90, size=6, vjust = 0.5, hjust=1))

There’s a big range here. What about by age?

by_item <- cleaned_data %>%
  left_join(pid_by_age) %>%
  group_by(targetWord, numAFC, age_group) %>%
  summarize(prop_correct = mean(correct), num_participants =length(unique(pid))) %>%
  left_join(word1_aoa) %>%
  ungroup() %>%
  mutate(targetWord = fct_reorder(targetWord, -prop_correct))

Look at how proportion correct varies by target word, age, and numAFC

ggplot(data = by_item, aes(x=targetWord, y=prop_correct, col=numAFC, size=num_participants)) +
  geom_point(alpha=.8) +
  theme_few() +
  facet_grid(numAFC~age_group) +
  theme(axis.text.x = element_text(angle = 90, size=4, vjust = 0.5, hjust=1))

ggplot(data = by_item, aes(x=AoA_Est_Word1, y=prop_correct, col=numAFC, size=num_participants)) +
  geom_point(alpha=.8) +
  theme_few() +
  facet_grid(numAFC~age_group) +
  geom_smooth(span=20) +
  theme(axis.text.x = element_text(angle = 90, size=6, vjust = 0.5, hjust=1))

Reasonable amounts of variability across age

ggplot(data = by_item, aes(x=targetWord, y=prop_correct, col=numAFC, size=num_participants)) +
  geom_point(alpha=.8) +
  theme_few() +
  facet_grid(numAFC~age_group) +
  theme(axis.text.x = element_text(angle = 90, size=6, vjust = 0.5, hjust=1))

Construct a 2PL model where guess rate is specificed

Make a list for specifying guess rate in IRT instead of letting it be a paramters?

itemIdforIRTGuess <- itemIdforIRT %>%
  arrange(itemId) %>%
  mutate(guess_rate = case_when( numAFC==2 ~0.5 ,
                                 numAFC==3 ~0.33 ,
                                 numAFC==4 ~0.25 )) 

list.guessing = itemIdforIRTGuess$guess_rate

Run basic 2PL model with priors specified above and guessing rate per item

start.dim <- dim(d_mat_all)[2]
mm.string = (
  'F = 1-%d,
  PRIOR = (1-%d, a1, norm, 1, 0.5),
  PRIOR = (1-%d, d, norm, 0, 2), 
  LBOUND = (1-%d, a1, 0.0)' # why was this at .5?
  )
mm = mirt.model(sprintf(mm.string, 
                        start.dim,
                        start.dim, 
                        start.dim,
                        start.dim))

Run a 2PL model

mod.vocab.2pl <- mirt(d_mat_all, model = mm, itemtype = '2PL', guess = list.guessing)
## 
Iteration: 1, Log-Lik: -51634.197, Max-Change: 5.38440
Iteration: 2, Log-Lik: -42961.571, Max-Change: 0.47868
Iteration: 3, Log-Lik: -42835.168, Max-Change: 0.19489
Iteration: 4, Log-Lik: -42782.723, Max-Change: 0.13694
Iteration: 5, Log-Lik: -42750.494, Max-Change: 0.10447
Iteration: 6, Log-Lik: -42730.012, Max-Change: 0.08535
Iteration: 7, Log-Lik: -42716.977, Max-Change: 0.06929
Iteration: 8, Log-Lik: -42708.700, Max-Change: 0.05589
Iteration: 9, Log-Lik: -42703.454, Max-Change: 0.04485
Iteration: 10, Log-Lik: -42700.134, Max-Change: 0.03587
Iteration: 11, Log-Lik: -42698.036, Max-Change: 0.02863
Iteration: 12, Log-Lik: -42696.710, Max-Change: 0.02281
Iteration: 13, Log-Lik: -42694.542, Max-Change: 0.00506
Iteration: 14, Log-Lik: -42694.506, Max-Change: 0.00376
Iteration: 15, Log-Lik: -42694.484, Max-Change: 0.00298
Iteration: 16, Log-Lik: -42694.447, Max-Change: 0.00059
Iteration: 17, Log-Lik: -42694.446, Max-Change: 0.00047
Iteration: 18, Log-Lik: -42694.446, Max-Change: 0.00036
Iteration: 19, Log-Lik: -42694.446, Max-Change: 0.00015
Iteration: 20, Log-Lik: -42694.446, Max-Change: 0.00010

Specify a 3PL model, allowing guess rate to vary

start.dim <- dim(d_mat_all)[2]
mm.string = (
  'F = 1-%d,
  PRIOR = (1-%d, a1, norm, 1, 0.5),
  PRIOR = (1-%d, d, norm, 0, 2), 
  LBOUND = (1-%d, g, .25),
  UBOUND = (1-%d, g, 1),
  LBOUND = (1-%d, a1, 0.0)' # why was this at .5?
  )
mm3pl = mirt.model(sprintf(mm.string, 
                        start.dim,
                        start.dim,
                        start.dim,
                        start.dim,
                        start.dim,
                        start.dim))
mod.vocab.3pl <- mirt(d_mat_all, model = mm3pl, itemtype = '3PL')
## 
Iteration: 1, Log-Lik: -53109.953, Max-Change: 3.92342
Iteration: 2, Log-Lik: -43634.487, Max-Change: 0.55905
Iteration: 3, Log-Lik: -43229.082, Max-Change: 0.47732
Iteration: 4, Log-Lik: -42968.010, Max-Change: 0.42427
Iteration: 5, Log-Lik: -42787.191, Max-Change: 0.40080
Iteration: 6, Log-Lik: -42664.554, Max-Change: 0.42137
Iteration: 7, Log-Lik: -42581.357, Max-Change: 0.36815
Iteration: 8, Log-Lik: -42527.257, Max-Change: 0.55529
Iteration: 9, Log-Lik: -42491.761, Max-Change: 0.39804
Iteration: 10, Log-Lik: -42469.541, Max-Change: 0.21810
Iteration: 11, Log-Lik: -42455.408, Max-Change: 0.08159
Iteration: 12, Log-Lik: -42446.480, Max-Change: 0.06901
Iteration: 13, Log-Lik: -42440.759, Max-Change: 0.07742
Iteration: 14, Log-Lik: -42437.086, Max-Change: 0.07718
Iteration: 15, Log-Lik: -42434.726, Max-Change: 0.11554
Iteration: 16, Log-Lik: -42430.732, Max-Change: 0.06554
Iteration: 17, Log-Lik: -42430.614, Max-Change: 0.08884
Iteration: 18, Log-Lik: -42430.578, Max-Change: 0.01894
Iteration: 19, Log-Lik: -42430.569, Max-Change: 0.01253
Iteration: 20, Log-Lik: -42430.554, Max-Change: 0.00532
Iteration: 21, Log-Lik: -42430.544, Max-Change: 0.00325
Iteration: 22, Log-Lik: -42430.531, Max-Change: 0.00089
Iteration: 23, Log-Lik: -42430.530, Max-Change: 0.00070
Iteration: 24, Log-Lik: -42430.529, Max-Change: 0.00057
Iteration: 25, Log-Lik: -42430.528, Max-Change: 0.00012
Iteration: 26, Log-Lik: -42430.528, Max-Change: 0.00016
Iteration: 27, Log-Lik: -42430.528, Max-Change: 0.00006

Tried 3PL models, 2PL had lower AIC/BIC,

anova(mod.vocab.2pl, mod.vocab.3pl)
##                    AIC    SABIC       HQ      BIC    logLik   logPost  df
## mod.vocab.2pl 84953.12 86945.75 86609.54 89481.09 -41678.56 -42694.44  NA
## mod.vocab.3pl 85326.07 88315.01 87810.70 92118.03 -41466.04 -42430.53 399

Get some coefficients for these models – choosing 2pl for now since lower AIC/BIC

co <- coef(mod.vocab.2pl,simplify=TRUE, IRTpars = FALSE) # Get coeeficients
coefs <- tibble::rownames_to_column(as.data.frame(co$items),'itemId')

Estimate reliabilty of this model

Marginal_rxx

mirt::marginal_rxx(mod.vocab.2pl)
## [1] 0.9693657

Empirical_rxx

fs_mod2pl <- fscores(mod.vocab.2pl, full.scores.SE=TRUE)
head(fs_mod2pl)
##                F      SE_F
## [1,] -0.53950006 0.4519553
## [2,] -0.65663442 0.4143779
## [3,] -0.06300122 0.4234623
## [4,]  0.23862979 0.4117100
## [5,] -1.25396162 0.3765784
## [6,]  0.54324603 0.4339549
empirical_rxx(fs_mod2pl)
##         F 
## 0.7772944
item_fits = itemfit(mod.vocab.2pl, fit_stats='infit')

It looks like we have really low infit statistics – I’m not sure what is going on here.

hist(item_fits$infit)

Outfits mostly look ok?

hist(item_fits$outfit)

plot(mod.vocab.2pl)

Trim off items with low slopes & rerunning 2pl

coefs_trimmed <- coefs %>%
  rename(slope = a1) %>%
  filter(slope>.8) %>%
  left_join(itemIdforIRT, by=c('itemId'))  %>%
  group_by(targetWord) %>%
  mutate(average_slope = mean(slope), both_items = length(unique(itemId))) 

We got rid of 62 of the items with lowest slopes.

trimmed_items <- coefs %>%
  rename(slope = a1) %>%
  filter(slope<=.8) %>%
  left_join(itemIdforIRT, by=c('itemId'))  %>%
  arrange(targetWord)

Looks like these items come from 38 unqique words, which is nice.

kable(trimmed_items %>% arrange(slope))
itemId slope d g u targetWord numAFC
gondola_trolley 0.0411556 -0.6843378 0.50 1 gondola 2
gondola_3 0.2026996 -0.3793319 0.33 1 gondola 3
candlestick_4 0.2721611 -1.4983416 0.25 1 candlestick 4
gondola_4 0.3541357 -0.1809290 0.25 1 gondola 4
freezer_4 0.4236752 -0.4222056 0.25 1 freezer 4
kimono_turntable 0.4264104 -0.2401230 0.50 1 kimono 2
taillight_3 0.4522444 0.4043826 0.33 1 taillight 3
fox_3 0.4728896 2.2000945 0.33 1 fox 3
freezer_3 0.4966922 -0.6925563 0.33 1 freezer 3
grate_3 0.4990694 -1.9368611 0.33 1 grate 3
sauerkraut_dashboard 0.5094169 0.8279084 0.50 1 sauerkraut 2
sorbet_tamale 0.5197377 0.3846426 0.50 1 sorbet 2
teapot_3 0.5210931 0.7190383 0.33 1 teapot 3
tuxedo_4 0.5270226 -0.9728908 0.25 1 tuxedo 4
cymbal_kaleidoscope 0.5284330 -1.0628817 0.50 1 cymbal 2
tuxedo_3 0.5289096 -1.5052972 0.33 1 tuxedo 3
lollipop_doorbell 0.5370490 2.7611910 0.50 1 lollipop 2
headdress_3 0.5430806 0.0463051 0.33 1 headdress 3
teapot_4 0.5492847 1.0315018 0.25 1 teapot 4
pajamas_pants 0.5585700 -0.6762484 0.50 1 pajamas 2
turtle_frog 0.5624805 1.5306907 0.50 1 turtle 2
taillight_bumper 0.5654226 -0.1817327 0.50 1 taillight 2
grate_4 0.5865926 -1.2388688 0.25 1 grate 4
spatula_paddle 0.5866268 0.1135574 0.50 1 spatula 2
pajamas_cage 0.5955741 2.0764512 0.50 1 pajamas 2
stretcher_thermostat 0.6028205 1.4714070 0.50 1 stretcher 2
koala_3 0.6058408 1.7826383 0.33 1 koala 3
mandolin_4 0.6295693 -0.1363828 0.25 1 mandolin 4
artichoke_sunroof 0.6348734 1.0944322 0.50 1 artichoke 2
oil_3 0.6390161 1.0605928 0.33 1 oil 3
stew_4 0.6551823 -0.3443436 0.25 1 stew 4
pump_coat 0.6661403 2.1142438 0.50 1 pump 2
headdress_poster 0.6719631 0.8980228 0.50 1 headdress 2
teabag_4 0.6744869 1.1388222 0.25 1 teabag 4
sauerkraut_4 0.6750641 -1.5842832 0.25 1 sauerkraut 4
fox_wolf 0.6756263 1.7413645 0.50 1 fox 2
horn_bone 0.6779682 0.2876980 0.50 1 horn 2
grate_crate 0.6861602 -4.3360946 0.50 1 grate 2
prune_raisin 0.6931293 -0.8239175 0.50 1 prune 2
mandolin_gourd 0.6987987 0.1085061 0.50 1 mandolin 2
candlestick_3 0.7007068 -2.1468933 0.33 1 candlestick 3
bobsled_4 0.7040955 -3.3050588 0.25 1 bobsled 4
waterwheel_4 0.7056044 1.0552635 0.25 1 waterwheel 4
cymbal_4 0.7081902 -0.3082975 0.25 1 cymbal 4
gondola_clipboard 0.7194366 1.6118202 0.50 1 gondola 2
tapestry_3 0.7219517 0.4322558 0.33 1 tapestry 3
mulch_compost 0.7230480 -5.3206956 0.50 1 mulch 2
cymbal_mallet 0.7378264 1.2515661 0.50 1 cymbal 2
candlestick_candle 0.7434295 -4.7766115 0.50 1 candlestick 2
pajamas_4 0.7487710 0.2540781 0.25 1 pajamas 4
saddle_figurine 0.7570973 0.5445203 0.50 1 saddle 2
caramel_amber 0.7618761 0.9414672 0.50 1 caramel 2
elbow_4 0.7624635 1.5676900 0.25 1 elbow 4
fox_4 0.7649781 2.3108962 0.25 1 fox 4
telescope_3 0.7697011 2.7948978 0.33 1 telescope 3
waterwheel_target 0.7793944 1.8686696 0.50 1 waterwheel 2
hoe_dustpan 0.7813166 1.9539678 0.50 1 hoe 2
sloth_donkey 0.7837115 2.3409808 0.50 1 sloth 2
sauerkraut_3 0.7875766 -1.2534834 0.33 1 sauerkraut 3
hoe_peg 0.7905833 1.1677296 0.50 1 hoe 2
corset_4 0.7907552 -0.8262486 0.25 1 corset 4
koala_panda 0.7967828 2.0371289 0.50 1 koala 2

Construct a 2PL with trimmed items

Construct trimmed dataframe for refitting 2PL model

d_wide_trimmed<- all_data %>%
  filter(itemId %in% unique(coefs_trimmed$itemId)) %>%
  select(itemId, correct, pid) %>%
  ungroup() %>%
  arrange(itemId) %>%
  ungroup() %>%
  pivot_wider(names_from=itemId, values_from=correct, values_fn = ~mean(.x)) %>%
  ungroup()

d_mat_all_trimmed <- d_wide_trimmed %>%
  select(-pid) %>%
  data.frame %>%
  data.matrix 

rownames(d_mat_all_trimmed) <- d_wide_trimmed$pid

Set model parameters

start.dim <- dim(d_mat_all_trimmed)[2]
mm.trimmed.string = (
  'F = 1-%d,
  PRIOR = (1-%d, a1, norm, 1, 0.5),
  PRIOR = (1-%d, d, norm, 0, 2), 
  LBOUND = (1-%g, g, 0.25), 
  UBOUND = (1-%g, g, 1), 
  LBOUND = (1-%d, a1, 0.0)' # why was this at .5?
  )
mm.trimmed = mirt.model(sprintf(mm.trimmed.string, 
                        start.dim,
                        start.dim, 
                        start.dim,
                        start.dim, 
                        start.dim,
                        start.dim))

Run basic 2PL model with priors specified above and guessing rate per item

First get guessing list

itemIdforIRTTrimmed <- all_data %>%
  filter(itemId %in% unique(coefs_trimmed$itemId)) %>%
  distinct(itemId, targetWord,numAFC) %>%
  arrange(itemId) %>%
  mutate(guess_rate = case_when( numAFC==2 ~0.5 ,
                                 numAFC==3 ~0.33 ,
                                 numAFC==4 ~0.25 )) 

list.guessing.trimmed = itemIdforIRTTrimmed$guess_rate
mod.vocab.2pl.trimmed <- mirt(d_mat_all_trimmed, model = mm.trimmed, itemtype = '2PL', guess = list.guessing.trimmed)
## 
Iteration: 1, Log-Lik: -42083.613, Max-Change: 4.70053
Iteration: 2, Log-Lik: -35150.399, Max-Change: 0.51004
Iteration: 3, Log-Lik: -35002.793, Max-Change: 0.21931
Iteration: 4, Log-Lik: -34929.706, Max-Change: 0.16336
Iteration: 5, Log-Lik: -34884.414, Max-Change: 0.13077
Iteration: 6, Log-Lik: -34856.092, Max-Change: 0.10435
Iteration: 7, Log-Lik: -34838.407, Max-Change: 0.08309
Iteration: 8, Log-Lik: -34827.382, Max-Change: 0.06599
Iteration: 9, Log-Lik: -34820.517, Max-Change: 0.05231
Iteration: 10, Log-Lik: -34816.247, Max-Change: 0.04139
Iteration: 11, Log-Lik: -34813.593, Max-Change: 0.03271
Iteration: 12, Log-Lik: -34811.944, Max-Change: 0.02583
Iteration: 13, Log-Lik: -34809.371, Max-Change: 0.00580
Iteration: 14, Log-Lik: -34809.325, Max-Change: 0.00433
Iteration: 15, Log-Lik: -34809.296, Max-Change: 0.00340
Iteration: 16, Log-Lik: -34809.251, Max-Change: 0.00073
Iteration: 17, Log-Lik: -34809.250, Max-Change: 0.00056
Iteration: 18, Log-Lik: -34809.250, Max-Change: 0.00045
Iteration: 19, Log-Lik: -34809.249, Max-Change: 0.00020
Iteration: 20, Log-Lik: -34809.249, Max-Change: 0.00013
Iteration: 21, Log-Lik: -34809.249, Max-Change: 0.00011
Iteration: 22, Log-Lik: -34809.249, Max-Change: 0.00011
Iteration: 23, Log-Lik: -34809.249, Max-Change: 0.00004
co_trimmed_model <- coef(mod.vocab.2pl.trimmed,simplify=TRUE, IRTpars = FALSE) # Get coeeficients
coefs_trimmed_model <- tibble::rownames_to_column(as.data.frame(co_trimmed_model$items),'itemId')
# coefs_trimmed_model %>%
  # arrange(a1) %>%
  # kable()
 hard_but_good <- coefs_trimmed_model %>%
  filter(d>quantile(d,.75)) %>%
  arrange(a1) %>%
  kable()
marginal_rxx(mod.vocab.2pl)
## [1] 0.9693657

Do our items load onto one factor mostly?

item_check = as.data.frame(summary(mod.vocab.2pl.trimmed))
##                           F    h2
## acorn_3               0.578 0.334
## acorn_4               0.645 0.416
## acorn_coconut         0.609 0.371
## acorn_key             0.537 0.289
## aloe_3                0.601 0.362
## aloe_4                0.712 0.506
## aloe_bracket          0.643 0.414
## aloe_cactus           0.662 0.438
## antenna_3             0.595 0.354
## antenna_4             0.610 0.373
## antenna_pinecone      0.527 0.278
## antenna_stem          0.597 0.356
## artichoke_3           0.501 0.251
## artichoke_4           0.631 0.398
## artichoke_leek        0.572 0.328
## bamboo_3              0.637 0.406
## bamboo_4              0.663 0.439
## bamboo_lumber         0.620 0.384
## bamboo_yacht          0.432 0.187
## barrel_3              0.616 0.379
## barrel_4              0.583 0.340
## barrel_bucket         0.561 0.315
## barrel_shelf          0.500 0.250
## blender_3             0.604 0.365
## blender_4             0.617 0.380
## blender_blind         0.506 0.256
## blender_mailbox       0.533 0.284
## blower_3              0.655 0.429
## blower_4              0.456 0.208
## blower_buggy          0.447 0.200
## blower_burner         0.558 0.312
## bobsled_3             0.649 0.421
## bobsled_dip           0.499 0.249
## bobsled_sidecar       0.466 0.217
## bouquet_3             0.718 0.516
## bouquet_4             0.709 0.503
## bouquet_bandanna      0.711 0.505
## bouquet_centerpiece   0.733 0.537
## buffet_3              0.598 0.357
## buffet_4              0.662 0.438
## buffet_counter        0.652 0.425
## buffet_crib           0.514 0.264
## bulldozer_3           0.474 0.224
## bulldozer_4           0.634 0.403
## bulldozer_orange      0.539 0.291
## bulldozer_tractor     0.599 0.359
## cake_3                0.568 0.323
## cake_4                0.610 0.372
## cake_dessert          0.455 0.207
## candlestick_egg       0.537 0.289
## caramel_3             0.537 0.289
## caramel_4             0.648 0.420
## caramel_olive         0.604 0.365
## carousel_3            0.540 0.292
## carousel_4            0.658 0.432
## carousel_carriage     0.562 0.316
## carousel_dish         0.557 0.310
## carrot_3              0.613 0.376
## carrot_4              0.611 0.373
## carrot_phone          0.582 0.338
## cassette_3            0.585 0.342
## cassette_4            0.598 0.357
## cassette_book         0.525 0.276
## cassette_tape         0.526 0.277
## cheese_3              0.668 0.447
## cheese_4              0.708 0.501
## cheese_butter         0.742 0.551
## cloak_3               0.653 0.427
## cloak_4               0.718 0.516
## cloak_cinnamon        0.605 0.366
## cloak_hood            0.599 0.359
## clothespin_3          0.501 0.251
## clothespin_4          0.451 0.204
## clothespin_toothpick  0.588 0.346
## clothespin_volleyball 0.581 0.337
## coaster_3             0.634 0.402
## coaster_4             0.589 0.347
## coaster_card          0.532 0.283
## coaster_painting      0.499 0.249
## cork_3                0.633 0.401
## cork_4                0.710 0.503
## cork_jersey           0.581 0.338
## cork_skateboard       0.623 0.388
## cornbread_3           0.575 0.331
## cornbread_4           0.529 0.280
## cornbread_corn        0.630 0.397
## corset_3              0.479 0.230
## corset_harness        0.699 0.489
## corset_mantle         0.492 0.242
## cymbal_3              0.446 0.198
## dumpling_3            0.568 0.323
## dumpling_4            0.524 0.274
## dumpling_meatball     0.452 0.204
## dumpling_rocket       0.660 0.436
## elbow_3               0.551 0.303
## elbow_arm             0.505 0.255
## fan_3                 0.573 0.329
## fan_4                 0.586 0.343
## flan_3                0.543 0.295
## flan_4                0.503 0.253
## flan_amplifier        0.583 0.340
## flan_fuse             0.580 0.337
## foam_3                0.595 0.354
## foam_4                0.675 0.456
## foam_float            0.718 0.515
## foam_quilt            0.515 0.265
## footbath_3            0.555 0.308
## footbath_4            0.657 0.431
## freezer_cooler        0.500 0.250
## fruitcake_3           0.547 0.299
## fruitcake_4           0.647 0.419
## fruitcake_dough       0.435 0.189
## fruitcake_fudge       0.572 0.327
## grate_reel            0.516 0.267
## gutter_3              0.514 0.264
## gutter_4              0.579 0.336
## gutter_almond         0.577 0.332
## gutter_filter         0.552 0.305
## hamster_3             0.673 0.453
## hamster_4             0.633 0.400
## hamster_rabbit        0.603 0.363
## hamster_tadpole       0.620 0.385
## headdress_4           0.452 0.205
## headdress_crown       0.417 0.174
## hedgehog_3            0.621 0.386
## hedgehog_4            0.648 0.420
## hedgehog_iguana       0.490 0.240
## hedgehog_owl          0.650 0.423
## hoe_3                 0.475 0.225
## hoe_4                 0.552 0.304
## hopscotch_3           0.628 0.394
## hopscotch_4           0.664 0.440
## hopscotch_chalk       0.619 0.383
## hopscotch_funnel      0.654 0.428
## horn_3                0.453 0.205
## horn_4                0.443 0.196
## horn_chin             0.460 0.211
## kimono_3              0.672 0.452
## kimono_4              0.579 0.335
## kimono_cardigan       0.633 0.401
## koala_4               0.608 0.370
## latch_3               0.618 0.381
## latch_4               0.629 0.395
## latch_lock            0.551 0.304
## latch_microwave       0.568 0.323
## locker_3              0.550 0.303
## locker_4              0.692 0.479
## locker_basket         0.615 0.379
## locker_cabinet        0.498 0.248
## lollipop_3            0.556 0.309
## lollipop_4            0.573 0.328
## lollipop_candy        0.614 0.377
## mandolin_3            0.499 0.249
## mandolin_mast         0.515 0.265
## map_4                 0.614 0.377
## map_marker            0.485 0.235
## map_ruby              0.476 0.227
## marshmallow_3         0.594 0.353
## marshmallow_4         0.539 0.291
## mulch_3               0.621 0.385
## mulch_4               0.607 0.368
## mulch_clarinet        0.497 0.247
## net_3                 0.687 0.472
## net_4                 0.732 0.536
## net_domino            0.620 0.384
## net_tee               0.713 0.509
## oatmeal_3             0.500 0.250
## oatmeal_4             0.543 0.295
## oatmeal_cereal        0.486 0.236
## oil_4                 0.428 0.183
## oil_gel               0.538 0.290
## oil_grain             0.646 0.418
## omelet_3              0.688 0.473
## omelet_4              0.663 0.439
## omelet_clay           0.632 0.400
## omelet_pancake        0.525 0.275
## otter_3               0.609 0.371
## otter_4               0.500 0.250
## otter_beaver          0.669 0.448
## otter_goose           0.425 0.181
## pajamas_3             0.595 0.354
## parsley_3             0.627 0.393
## parsley_4             0.665 0.442
## parsley_basil         0.498 0.248
## parsley_tiara         0.571 0.326
## pie_3                 0.545 0.297
## pie_4                 0.591 0.349
## pistachio_3           0.545 0.297
## pistachio_4           0.673 0.453
## pistachio_avocado     0.479 0.230
## pistachio_taco        0.489 0.239
## pitcher_3             0.720 0.518
## pitcher_4             0.790 0.624
## pitcher_batter        0.735 0.541
## pitcher_tumbleweed    0.574 0.329
## potato_3              0.547 0.299
## potato_4              0.608 0.369
## potato_pot            0.642 0.412
## prism_3               0.699 0.489
## prism_4               0.722 0.522
## prism_radar           0.687 0.473
## prism_subway          0.502 0.252
## prune_3               0.517 0.268
## prune_4               0.612 0.374
## prune_headrest        0.611 0.374
## puddle_3              0.609 0.371
## puddle_4              0.661 0.437
## puddle_baseball       0.581 0.337
## puddle_bubble         0.507 0.257
## pump_3                0.521 0.271
## pump_4                0.618 0.382
## pump_plug             0.606 0.368
## rice_3                0.604 0.365
## rice_4                0.540 0.292
## rice_dice             0.555 0.308
## saddle_3              0.559 0.312
## saddle_4              0.659 0.434
## saddle_handle         0.618 0.382
## sandbag_3             0.422 0.178
## sandbag_4             0.471 0.222
## sandbag_sandpaper     0.479 0.229
## sandbag_valve         0.566 0.320
## sauerkraut_potpourri  0.500 0.250
## scaffolding_3         0.537 0.288
## scaffolding_4         0.633 0.400
## scaffolding_satellite 0.649 0.421
## scaffolding_veil      0.537 0.288
## scoop_3               0.506 0.256
## scoop_4               0.544 0.296
## scoop_sauce           0.505 0.255
## scoop_statue          0.467 0.218
## seagull_3             0.617 0.381
## seagull_4             0.501 0.251
## seagull_pigeon        0.635 0.404
## ship_3                0.577 0.333
## ship_4                0.557 0.311
## ship_boat             0.635 0.403
## shower_3              0.635 0.404
## shower_4              0.650 0.423
## shower_soap           0.511 0.261
## shower_toe            0.553 0.306
## silverware_3          0.594 0.353
## silverware_4          0.671 0.450
## silverware_garlic     0.554 0.307
## silverware_trophy     0.633 0.401
## sink_3                0.643 0.414
## sink_4                0.575 0.331
## sink_stair            0.657 0.432
## sink_toilet           0.603 0.364
## ski_3                 0.580 0.337
## ski_4                 0.657 0.432
## ski_ice               0.613 0.376
## ski_suitcase          0.620 0.384
## sloth_3               0.469 0.220
## sloth_4               0.443 0.196
## sloth_clam            0.495 0.245
## snail_3               0.606 0.367
## snail_4               0.609 0.371
## snail_worm            0.614 0.377
## sorbet_3              0.496 0.246
## sorbet_4              0.542 0.294
## sorbet_palette        0.497 0.247
## spatula_3             0.514 0.264
## spatula_4             0.433 0.188
## spatula_lava          0.593 0.352
## sprinkler_3           0.576 0.332
## sprinkler_4           0.637 0.406
## sprinkler_sparkler    0.443 0.196
## squash_3              0.556 0.309
## squash_4              0.470 0.221
## squash_branch         0.532 0.283
## squash_pumpkin        0.618 0.382
## squirrel_3            0.595 0.354
## squirrel_4            0.499 0.249
## squirrel_eagle        0.546 0.298
## squirrel_monkey       0.471 0.222
## stew_3                0.609 0.371
## stew_mixer            0.564 0.318
## stew_soup             0.642 0.412
## stretcher_3           0.700 0.491
## stretcher_4           0.649 0.421
## stretcher_drill       0.649 0.421
## stump_3               0.665 0.443
## stump_4               0.593 0.352
## stump_bookshelf       0.721 0.520
## stump_log             0.699 0.488
## sunflower_4           0.555 0.308
## swordfish_3           0.585 0.342
## swordfish_4           0.625 0.391
## swordfish_catfish     0.746 0.557
## swordfish_leopard     0.559 0.312
## taillight_4           0.528 0.279
## taillight_luggage     0.674 0.455
## tapestry_4            0.594 0.352
## tapestry_backdrop     0.610 0.372
## tapestry_leggings     0.512 0.262
## teabag_3              0.523 0.274
## teabag_fingerprint    0.605 0.366
## teabag_tea            0.530 0.281
## teapot_brake          0.603 0.363
## teapot_teacup         0.534 0.285
## telescope_4           0.542 0.294
## telescope_tripod      0.450 0.203
## thermos_3             0.667 0.445
## thermos_4             0.588 0.345
## thermos_firewood      0.659 0.434
## thermos_oilcan        0.664 0.441
## treasure_3            0.594 0.353
## treasure_4            0.446 0.199
## treasure_gift         0.483 0.233
## trumpet_3             0.509 0.259
## trumpet_4             0.673 0.453
## trumpet_flute         0.557 0.310
## trumpet_violin        0.454 0.206
## tulip_3               0.507 0.257
## tulip_4               0.535 0.287
## tulip_glove           0.666 0.444
## tulip_rose            0.569 0.323
## turbine_3             0.610 0.373
## turbine_4             0.568 0.323
## turbine_generator     0.646 0.417
## turbine_tab           0.684 0.468
## turkey_3              0.478 0.228
## turkey_4              0.617 0.381
## turkey_goat           0.693 0.481
## turtle_3              0.524 0.275
## turtle_4              0.476 0.227
## turtle_horse          0.642 0.413
## tuxedo_stroller       0.521 0.271
## tuxedo_suit           0.521 0.271
## typewriter_3          0.523 0.273
## typewriter_4          0.697 0.486
## typewriter_printer    0.510 0.260
## typewriter_sunglasses 0.636 0.404
## watermelon_3          0.492 0.242
## waterwheel_3          0.580 0.337
## waterwheel_pinwheel   0.640 0.410
## 
## SS loadings:  115.431 
## Proportion Var:  0.343 
## 
## Factor correlations: 
## 
##   F
## F 1
item_check_2 = rownames_to_column(item_check)

item_info <- item_check_2 %>%
  as_tibble() %>%
  rename(itemId = rowname) %>%
  mutate(targetWord = str_split_fixed(itemId, '_',2)[,1]) %>%
    mutate(numAFC = str_split_fixed(itemId, '_',2)[,2]) %>%
  left_join(word1_aoa) %>%
  group_by(targetWord) %>%
  mutate(avg_h2 = mean(h2)) %>%
  ungroup() %>%
  mutate(targetWordOrdered = fct_reorder(targetWord, avg_h2))

Here, each dot is a trial type, and the y-axis the factor loading for that trial – looks like we have a pretty even spead from .4 to .8

ggplot(data = item_info, aes(x=targetWordOrdered, y=F, col=AoA_Est_Word1)) +
  theme_few(base_size = 12) +
  geom_point()  +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size=4), aspect.ratio = .4) 

  # facet_wrap(~numAFC, nrow=1)

Now do some pre-processing to get individual theta estiamtes

Create sandbox participant who has seen all trials

list.items <- itemIdforIRTTrimmed$itemId
df.sandbox.participant <- 
  tibble(pid = "sandbox", itemId = list.items, correct = 1)

Create trial ID structure for matrix

duplicate_ids <- school_data_multi_afc_cleaned %>%
  group_by(pid) %>%
  summarize(duplicates = length(unique(runId))) %>%
  filter(duplicates>1) 
ceiling_kids_multiafc <- school_data_multi_afc_cleaned %>%
  group_by(pid) %>%
  summarize(pc = mean(correct)) %>%
  filter(pc>.95) 
df.rocketship.roar.trials.clean <- school_data_multi_afc_cleaned %>% 
  filter(pid %in% completed_twice$pid) %>%
  filter(!pid %in% ceiling_kids_multiafc$pid) %>%
  filter(!pid %in% duplicate_ids$pid) %>%
  filter(studyId %in%  c("school-multiAFC"), itemType == "test")  %>%
  left_join(itemIdforIRT, by=c('targetWord','numAFC')) %>%
  filter(itemId %in% list.items) %>%
  dplyr :: distinct(pid, runId, studyId, start_time, numAFC, targetWord, itemId, correct, rt) 
df.rocketship.2afc.roar.trials.clean <- school_data_2afc_cleaned %>% 
  mutate(itemId = paste(word1, word2, sep = "_")) %>%
  filter(itemId %in% list.items) %>%
  dplyr :: distinct(pid, runId, studyId, start_time, numAFC, word1, word2, itemId, correct, rt) 

There were 155 trials in the 2AFC version.

Make each of the data structures for getting thetas

df.multiAFC.resp <- df.rocketship.roar.trials.clean %>% 
  select(pid, itemId, correct) %>% 
  rbind(df.sandbox.participant) %>%
  unique() %>% 
  pivot_wider(names_from = itemId, values_fn = mean, values_from = correct, values_fill = NA)

df.2AFC.resp <- df.rocketship.roar.trials.clean %>% 
  filter(numAFC == 2) %>% 
  distinct(pid, itemId, correct) %>% 
  rbind(df.sandbox.participant) %>%
  unique() %>% 
  pivot_wider(names_from = itemId, values_fn = mean, values_from = correct, values_fill = NA)

df.3AFC.resp <- df.rocketship.roar.trials.clean %>% 
  filter(numAFC == 3) %>% 
  distinct(pid, itemId, correct) %>% 
  rbind(df.sandbox.participant) %>%
  unique() %>% 
  pivot_wider(names_from = itemId, values_fn = mean, values_from = correct, values_fill = NA)

df.4AFC.resp <- df.rocketship.roar.trials.clean %>% 
  filter(numAFC == 4) %>% 
  distinct(pid, itemId, correct) %>% 
  rbind(df.sandbox.participant) %>%
  unique() %>% 
  pivot_wider(names_from = itemId, values_fn = mean, values_from = correct, values_fill = NA)

Make the data structures for getting thetas from 2afc

duplicates_first_admin <- df.rocketship.2afc.roar.trials.clean %>%
  group_by(pid) %>%
  summarize(duplicates = length(unique(runId))>1) %>%
  filter(duplicates==TRUE)

Eliminate kids who don’t have enough items

kids_not_enough_items <- df.rocketship.2afc.roar.trials.clean %>%
  filter(!pid %in% duplicates_first_admin$pid) %>%
  distinct(pid, itemId, correct)  %>%
  filter(itemId %in% list.items) %>%
  group_by(pid) %>%
  summarize(items_per_kid = length(unique(itemId))) %>%
  filter(items_per_kid<10)

Or kids who end up being at ceiling in this subset

kids_ceiling <- df.rocketship.2afc.roar.trials.clean %>%
  filter(!pid %in% duplicates_first_admin$pid) %>%
  distinct(pid, itemId, correct)  %>%
  filter(itemId %in% list.items) %>%
  group_by(pid) %>%
  summarize(pc = mean(correct)) %>%
  filter(pc>.95)
df.2AFC.first_admin <- df.rocketship.2afc.roar.trials.clean %>% 
  filter(!pid %in% kids_ceiling$pid) %>%
  filter(!pid %in% duplicates_first_admin$pid) %>%
  filter(!pid %in% kids_not_enough_items$pid) %>%
  filter(pid %in% unique(df.rocketship.roar.trials.clean$pid)) %>%
  distinct(pid, itemId, correct) %>% 
  rbind(df.sandbox.participant) %>%
  unique() %>% 
  pivot_wider(names_from = itemId, values_from = correct, values_fn = mean, values_fill = NA)

Get theta score based on these reseponse pattenrs

getTheta <- function(df){
  thetas <- fscores(mod.vocab.2pl.trimmed, method = "ML", response.pattern = df %>% select(-pid), max_theta = 8, min_theta = -8, theta_lim = c(-8, 8))
  return (data.frame(thetas)$F)
}

Overall and based on 234afc separately

df.thetas.students <- df.multiAFC.resp %>% 
    dplyr :: select(pid) %>% 
    add_column(theta.multiAFC = getTheta(df.multiAFC.resp), 
               theta.2AFC = getTheta(df.2AFC.resp),
               theta.3AFC = getTheta(df.3AFC.resp),
               theta.4AFC = getTheta(df.4AFC.resp)
               )
sum(df.multiAFC.resp$pid==df.2AFC.resp$pid)
## [1] 850

Overall and based on 234afc separately

df.thetas.students.first_admin <- df.2AFC.first_admin %>% 
    dplyr :: select(pid) %>% 
    add_column(theta.2AFC = getTheta(df.2AFC.first_admin))

Now get thetas for 2AFC kids with same model

dfs_all <- df.thetas.students.first_admin %>%
  rename(theta.2AFC.initial = theta.2AFC) %>%
  right_join(df.thetas.students, by=c('pid')) %>%
  left_join(pid_by_age) %>%
  mutate(age_group = as.numeric(age_group)) %>%
  filter(!is.na(age_group))

Students who did both multiAFC and 2AFC assessments for each age group

ggplot(data=dfs_all, aes(x=theta.2AFC.initial, y=theta.multiAFC, col=age_group)) +
  geom_point(alpha=.6) +
  stat_cor(cor.coef.name = 'r', aes(label = ..r.label..))  +
  geom_smooth(span=20) +
  facet_grid(~age_group) +
  theme(legend.position='none')

ggplot(data=dfs_all,  aes(x=theta.2AFC.initial, y=theta.multiAFC, col=age_group)) +
  geom_point(alpha=.6) +
  stat_cor(cor.coef.name = 'r', aes(label = ..r.label..))  +
  geom_smooth(span=20) +
  theme(legend.position='none')

Correlation with age isn’t thaaat bad

ggplot(data=dfs_all, aes(x=age_numeric, y=theta.multiAFC, col=age_group)) +
  geom_point(alpha=.6) +
  stat_cor(cor.coef.name = 'r', aes(label = ..r.label..))  +
  geom_smooth(span=20) +
  # facet_grid(~age_group) +
  theme(legend.position='none') +
  ggtitle('Theta by age')

Construct model & estimate reliability with only this subset

Preprocess data structures

Kick out items and kids at ceiling in this subset

ceiling <- all_data %>%
  filter(pid %in% dfs_all$pid) %>%
  group_by(itemId) %>%
  summarise(pc = mean(correct)) %>%
  filter(pc==1)
ceiling_kids <- all_data %>%
  filter(pid %in% dfs_all$pid) %>%
  group_by(pid) %>%
  summarise(pc = mean(correct)) %>%
  filter(pc>.9)
# runs_by_pid<- all_data %>%
#   filter(pid %in% dfs_all$pid) %>%
#   filter(!pid %in% ceiling_kids$pid) %>%
#   filter(itemId %in% unique(coefs_trimmed$itemId)) %>%
#   filter(!itemId %in% unique(ceiling$itemId)) %>%
#   mutate(pid_unique = paste0(pid, '_', administration)) %>%
#   group_by(pid_unique) %>%
#   summarize(num_runs = length(unique(runId)))
d_wide_school<- all_data %>%
  filter(pid %in% dfs_all$pid) %>%
  filter(!pid %in% ceiling_kids$pid) %>%
  filter(itemId %in% unique(coefs_trimmed$itemId)) %>%
  filter(!itemId %in% unique(ceiling$itemId)) %>%
  mutate(pid_unique = paste0(pid, '_', administration)) %>%
  select(itemId, correct, pid_unique) %>%
  ungroup() %>%
  arrange(itemId) %>%
  ungroup() %>%
  pivot_wider(names_from=itemId, values_from=correct, values_fn = ~mean(.x)) %>%
  ungroup()

d_mat_school <- d_wide_school %>%
  select(-pid_unique) %>%
  data.frame %>%
  data.matrix 

rownames(d_mat_school) <- d_wide_school$pid_unique

assert_that(sum(d_wide_school$pid_unique!=rownames(d_mat_school))==0)
## [1] TRUE

Make guess list

items_d_mat_school = colnames(d_mat_school)
itemIdforIRTGuess <- itemIdforIRT %>%
  filter(itemId %in% items_d_mat_school) %>%
  arrange(itemId) %>%
  mutate(guess_rate = case_when( numAFC==2 ~0.5 ,
                                 numAFC==3 ~0.33 ,
                                 numAFC==4 ~0.25 )) 

list.guessing = itemIdforIRTGuess$guess_rate

Construct 2PL model with rocketship kids only

start.dim <- dim(d_mat_school)[2]
mm.string = (
  'F = 1-%d,
  PRIOR = (1-%d, a1, norm, 1, 0.5),
  PRIOR = (1-%d, d, norm, 0, 2), 
  LBOUND = (1-%d, a1, 0.0)' # why was this at .5?
  )
mm = mirt.model(sprintf(mm.string, 
                        start.dim,
                        start.dim, 
                        start.dim,
                        start.dim))
mod.vocab.2pl.rocketship <- mirt(d_mat_school, model = mm, itemtype = '2PL', guess=list.guessing)
## 
Iteration: 1, Log-Lik: -22889.330, Max-Change: 4.46130
Iteration: 2, Log-Lik: -18815.557, Max-Change: 0.46271
Iteration: 3, Log-Lik: -18746.126, Max-Change: 0.16113
Iteration: 4, Log-Lik: -18726.956, Max-Change: 0.10280
Iteration: 5, Log-Lik: -18718.841, Max-Change: 0.07003
Iteration: 6, Log-Lik: -18715.118, Max-Change: 0.04766
Iteration: 7, Log-Lik: -18713.377, Max-Change: 0.03260
Iteration: 8, Log-Lik: -18712.560, Max-Change: 0.02239
Iteration: 9, Log-Lik: -18712.175, Max-Change: 0.01535
Iteration: 10, Log-Lik: -18711.868, Max-Change: 0.00491
Iteration: 11, Log-Lik: -18711.849, Max-Change: 0.00336
Iteration: 12, Log-Lik: -18711.841, Max-Change: 0.00232
Iteration: 13, Log-Lik: -18711.834, Max-Change: 0.00071
Iteration: 14, Log-Lik: -18711.833, Max-Change: 0.00050
Iteration: 15, Log-Lik: -18711.833, Max-Change: 0.00035
Iteration: 16, Log-Lik: -18711.833, Max-Change: 0.00012
Iteration: 17, Log-Lik: -18711.833, Max-Change: 0.00012
Iteration: 18, Log-Lik: -18711.833, Max-Change: 0.00010

Marginal and empirical reliability in this sample

Different ways of estimating reliability

marginal_rxx(mod.vocab.2pl.rocketship)
## [1] 0.9569501
fs <- fscores(mod.vocab.2pl.rocketship, full.scores.SE=TRUE)
head(fs)
##               F      SE_F
## [1,] -0.0633073 0.4993017
## [2,]  0.5219550 0.5768861
## [3,] -1.6932029 0.4699129
## [4,] -0.1377871 0.4979300
## [5,]  0.6879826 0.5387015
## [6,] -0.3753503 0.4952175
empirical_rxx(fs)
##         F 
## 0.6275197

Check whcih items are left

final_items_left <- itemIdforIRTTrimmed %>%
  filter(numAFC==4) %>%
  left_join(word1_aoa, by=c('targetWord')) %>%
  arrange(AoA_Est_Word1) 

kable(final_items_left)
itemId targetWord numAFC guess_rate AoA_Est_Word1
carrot_4 carrot 4 0.25 2.740000
cake_4 cake 4 0.25 3.260000
pie_4 pie 4 0.25 3.670000
rice_4 rice 4 0.25 3.720000
marshmallow_4 marshmallow 4 0.25 3.800000
lollipop_4 lollipop 4 0.25 3.890000
turkey_4 turkey 4 0.25 3.950000
turtle_4 turtle 4 0.25 4.170000
cheese_4 cheese 4 0.25 4.330000
hamster_4 hamster 4 0.25 4.370000
squirrel_4 squirrel 4 0.25 4.440000
sink_4 sink 4 0.25 4.470000
oatmeal_4 oatmeal 4 0.25 4.500000
shower_4 shower 4 0.25 4.720000
horn_4 horn 4 0.25 4.840000
potato_4 potato 4 0.25 4.840000
scoop_4 scoop 4 0.25 5.140000
ship_4 ship 4 0.25 5.330000
puddle_4 puddle 4 0.25 5.370000
seagull_4 seagull 4 0.25 5.420000
otter_4 otter 4 0.25 5.470000
sprinkler_4 sprinkler 4 0.25 5.530000
map_4 map 4 0.25 5.600000
bulldozer_4 bulldozer 4 0.25 5.610000
fan_4 fan 4 0.25 5.630000
snail_4 snail 4 0.25 5.790000
hopscotch_4 hopscotch 4 0.25 5.800000
silverware_4 silverware 4 0.25 5.890000
acorn_4 acorn 4 0.25 5.950000
koala_4 koala 4 0.25 6.000000
sunflower_4 sunflower 4 0.25 6.000000
pump_4 pump 4 0.25 6.060000
treasure_4 treasure 4 0.25 6.060000
foam_4 foam 4 0.25 6.150000
trumpet_4 trumpet 4 0.25 6.280000
cassette_4 cassette 4 0.25 6.370000
cornbread_4 cornbread 4 0.25 6.400000
pitcher_4 pitcher 4 0.25 6.420000
saddle_4 saddle 4 0.25 6.420000
omelet_4 omelet 4 0.25 6.500000
caramel_4 caramel 4 0.25 6.580000
carousel_4 carousel 4 0.25 6.680000
ski_4 ski 4 0.25 6.680000
typewriter_4 typewriter 4 0.25 6.740000
blender_4 blender 4 0.25 6.840000
squash_4 squash 4 0.25 6.940000
cloak_4 cloak 4 0.25 6.950000
telescope_4 telescope 4 0.25 6.950000
net_4 net 4 0.25 7.000000
antenna_4 antenna 4 0.25 7.050000
tulip_4 tulip 4 0.25 7.148876
spatula_4 spatula 4 0.25 7.320000
oil_4 oil 4 0.25 7.330000
buffet_4 buffet 4 0.25 7.580000
stump_4 stump 4 0.25 7.580000
barrel_4 barrel 4 0.25 7.720000
gutter_4 gutter 4 0.25 7.790000
clothespin_4 clothespin 4 0.25 7.830000
sandbag_4 sandbag 4 0.25 7.830000
prune_4 prune 4 0.25 7.850000
coaster_4 coaster 4 0.25 7.890000
latch_4 latch 4 0.25 8.000000
locker_4 locker 4 0.25 8.220000
fruitcake_4 fruitcake 4 0.25 8.280000
thermos_4 thermos 4 0.25 8.290000
blower_4 blower 4 0.25 8.320000
sloth_4 sloth 4 0.25 8.370000
dumpling_4 dumpling 4 0.25 8.500000
footbath_4 footbath 4 0.25 8.530000
hedgehog_4 hedgehog 4 0.25 8.630000
bouquet_4 bouquet 4 0.25 8.720000
hoe_4 hoe 4 0.25 8.720000
pistachio_4 pistachio 4 0.25 8.740000
aloe_4 aloe 4 0.25 8.950000
parsley_4 parsley 4 0.25 8.950000
cork_4 cork 4 0.25 9.050000
headdress_4 headdress 4 0.25 9.060000
mulch_4 mulch 4 0.25 9.220000
bamboo_4 bamboo 4 0.25 9.280000
swordfish_4 swordfish 4 0.25 9.330000
taillight_4 taillight 4 0.25 9.330000
stretcher_4 stretcher 4 0.25 9.390000
prism_4 prism 4 0.25 9.781155
artichoke_4 artichoke 4 0.25 10.000000
tapestry_4 tapestry 4 0.25 10.950000
turbine_4 turbine 4 0.25 11.060000
scaffolding_4 scaffolding 4 0.25 11.220000
kimono_4 kimono 4 0.25 11.370000
sorbet_4 sorbet 4 0.25 12.000000
flan_4 flan 4 0.25 12.650000

```