Setup
Load data
- Load bing kids on multiafc
Make plots of overall accuracy correlations
Estimate thetas for each kid and correlate
Construct a 2PL model where guess rate is specificed
- Estimate reliabilty of this model
Trim off items with low slopes & rerunning 2pl
Construct a 2PL with trimmed items
Do our items load onto one factor mostly?
Now do some pre-processing to get individual theta estiamtes
- Get theta score based on these reseponse pattenrs
- Now get thetas for 2AFC kids with same model
Construct model & estimate reliability with only this subset
Preprocess data structures
Marginal and empirical reliability in this sample
- Check whcih items are left

Setup

library(tidyverse)
library(lubridate)
library(ggthemes)
library(mirt)
library(knitr)
library(dplyr)
library(ggpubr)
library(assertthat)

Load data

Read in aoa and other meta

item_meta <- read_csv(file = here::here('data/preprocessed_data/exp1_all_trials2023-04-11.csv'))

word1_aoa <- item_meta %>%
  distinct(Word1, AoA_Est_Word1)  %>%
  rename(targetWord = Word1)

Load 2afc data, which has already been cleaned

school_data_2afc_cleaned <- read_csv(file = here::here('data/preprocessed_data/all_preprocessed_data.csv')) %>%
  mutate(word1 = str_replace(word1, ' ', '.')) %>%
  mutate(word2 = str_replace(word2, ' ', '.')) %>%
  mutate(item_pair = paste0(word1,'_',word2)) %>%
  mutate(numAFC = 2) %>%
  mutate(sub_id = pid, numAFC = as.character(numAFC)) %>%
  filter(cohort=='school') %>%
  mutate(administration = 'initial')

Load meta data for 2afc participants

school_2afc_meta <- read_csv(here::here('data/school_meta_data/cs01-metadata-age.csv')) %>%
  as_tibble() %>%
  select(pid, age) %>%
  mutate(age_numeric = age, age_group = age) %>% # don't have precise age 
  select(-age) %>%
  filter(pid %in% unique(school_data_2afc_cleaned$pid)) %>%
  mutate(age_numeric = as.numeric(age_numeric))

We have 1606 rocketship children who completed the 2afc version.

There were 393 items in this initial version, including CLIP and DAIVT items.

Multiafc version

school_data_multi_afc <- read_csv(here::here('data/rocketship_data/multi-afc-with-meta.csv'))

Get rid of participants who missed more than 1 catch trial (out of three)

catch_low <- school_data_multi_afc %>%
  group_by(pid, itemType) %>%
  summarize(pc = mean(correct), num_catch = n()) %>%
  filter(itemType=='catch') %>%
  filter(pc <.5)

school_data_multi_afc_cleaned <- school_data_multi_afc %>%
  filter(!pid %in% catch_low$pid) %>% # exclude low acc kids
  filter(!is.na(as.numeric(age_group))) %>% # exclude adults
  filter(itemType=='test') %>% # only test trials
  filter(completed==TRUE) %>%
  mutate(cohort='school')
  
adults_multiafc_cleaned <- school_data_multi_afc %>%
  filter(!pid %in% catch_low$pid) %>% # exclude low acc kids
  filter(!pid %in% c('wm-try')) %>% # test pid
  filter(age_group == 'Adult') %>% # include adults
  filter(itemType=='test') %>% # only test trials
  filter(completed==TRUE)

We have 1428 rocketship who completed the multiafc version.

Load bing kids on multiafc

Load and preprocess bing demographics

bing_demo <- read_csv(here::here('data/validation_data/demographics.csv')) %>%
    as_tibble %>%
    select(SubID, `Age (months)`, Exclude, Date) %>%
    rename(age_in_months = `Age (months)`) %>%
    rename(pid = SubID) %>%
    mutate(age_numeric = age_in_months/12) %>%
    mutate(age_group = as.numeric(floor(age_numeric))) %>%
    select(pid, age_group, age_numeric)

bing_multiafc<- read_csv(here::here('data/validation_data/preprocessed_data/all_bing_data_for_item_info.csv'))  %>%
  mutate(cohort = 'bing') %>%
  filter(itemType == 'test') %>%
  select(-block) 

all_kids_multiafc <- school_data_multi_afc_cleaned %>%
  full_join(bing_multiafc)

We have 65 bing kids who completed the multiafc version.

We have 1493 rocketship + bing kids who completed the multiafc version.

Filter out the 2afc data so that we’re only including the CLIP items.

school_data_2afc_cleaned_clip_only <- school_data_2afc_cleaned %>%
  filter(word1 %in% unique(school_data_multi_afc$targetWord))

There were 393 items in this initial version, including CLIP and DAIVT items.

There were 218 items in this initial version, including only CLIP items.

Join together pids from both administrations

completed_twice_pids <- school_data_2afc_cleaned_clip_only %>%
  filter(pid %in% unique(school_data_multi_afc_cleaned$pid) ) %>%
  distinct(pid)

There are 871 kids who completed both versions.

Compute correct by pid

correct_by_pid <- school_data_2afc_cleaned %>%
  group_by(pid) %>%
  summarize(prop_correct_2afc = mean(correct))

Meta data for joining back

pid_by_age <- school_data_multi_afc_cleaned %>%
  mutate(age_numeric = as.numeric(age_numeric), age_group = as.numeric(age_group)) %>%
  distinct(pid, age_group, age_numeric) %>%
  full_join(school_2afc_meta) %>%
  full_join(bing_demo)

Compute data structure with accuracy overall for repeat kids (don’t get bing kids here)

correct_by_pid_by_multi_afc <- school_data_multi_afc_cleaned %>%
  mutate(age_group = as.numeric(age_group)) %>%
  group_by(pid, age_group) %>%
  summarize(prop_correct_overall = mean(correct)) 

completed_twice_overall <- correct_by_pid %>% # 2afc initial
  left_join(correct_by_pid_by_multi_afc, by=c('pid')) %>% # join overall multiafc
  filter(!is.na(prop_correct_overall))

Make data structure now with multiafc

correct_by_pid_by_afc <- school_data_multi_afc_cleaned %>%
  group_by(pid, numAFC, age_group) %>%
  summarize(prop_correct_nafc = mean(correct))

Make it wider to join

correct_by_pid_by_afc_wide <- correct_by_pid_by_afc %>%
  rowwise() %>%
  mutate(numAFC_string = paste0('num',numAFC)) %>%
  select(-numAFC) %>%
  pivot_wider(names_from = numAFC_string, values_from = 'prop_correct_nafc')

Now join back with 2afc data, only get kids in both

completed_twice <- correct_by_pid %>%
  left_join(correct_by_pid_by_afc) %>%
  filter(!is.na(prop_correct_nafc))

Make a long version

completed_twice_long <- correct_by_pid %>%
  left_join(correct_by_pid_by_afc_wide) %>%
  filter(!is.na(num2))

Make plots of overall accuracy correlations

ggplot(data=completed_twice_overall %>% mutate(age_group = as.numeric(age_group)), aes(x=prop_correct_2afc, y=prop_correct_overall, col=age_group)) +
  geom_point(alpha=.7) +
  geom_smooth() +
  theme_few(base_size=12) +
  ggtitle('Initial (2afc) vs. second (multiafc) visual vocab admin') +
  ggpubr::stat_cor(size=3) +
  xlab('Initial 2afc accuracy') +
  ylab('Second multiaf accuracy (overall)') +
  facet_wrap(~age_group)

Same when we look at pc correlations overall, doesn’t seem to be about some kids always being at ceiling – there’s a range

ggplot(data=completed_twice_overall %>% mutate(age_group = as.numeric(age_group)), aes(x=prop_correct_2afc, y=prop_correct_overall, col=age_group)) +
  geom_point(alpha=.7) +
  geom_smooth() +
  theme_few(base_size=12) +
  ggtitle('Initial (2afc) vs. second (multiafc) visual vocab admin') +
  ggpubr::stat_cor(size=3) +
  xlab('Initial 2afc accuracy') +
  ylab('Second multiafc accuracy (overall)')

Correlations between first and second administration aren’t that high, and it doesn’t matter which AFC trials we look at

ggplot(data=completed_twice, aes(x=prop_correct_2afc, y=prop_correct_nafc, col=as.numeric(age_group))) +
  geom_point(alpha=.6) +
  geom_smooth() +
  theme_few(base_size=16) +
  ggtitle('Initial vs. second adminstration by AFC') +
  facet_wrap(~numAFC) +
  xlab('Initial 2afc acc') +
  ylab('Second multiafc acc') +
  stat_cor() +
  theme(legend.position='none')

Estimate thetas for each kid and correlate

OK, so maybe these correlations are low because there is variability in the items, and we want to see what it’s likke when we estimate thetas for each kids.

To do so, let’s go ahead and use ALL of the data to fit a model predicting thetas for all kids together

First read back in prior trial level data and munge it

Use preprocessed data from other children/adults who have taken any version to estimate thetas for each child.

We need “item_ids” to be consistent across the datasets, so we have so munging to do

First get meta info on distractors for 2afc multi-afc hard trials so we can merge across the same trial tyope from both experiments

multiafc_distractors = read_csv(here::here('data/preprocessed_data/exp1_all_stimuli_april_2023.csv'))  %>%
  mutate(targetWord = Word1) %>%
  filter(trial_type == 'hard')  %>%
  select(targetWord, Word2)

Load all 2afc data in prolific/school cohort and make word1_word2 trialId

all_2afc_data <- read_csv(file = here::here('data/preprocessed_data/all_preprocessed_data.csv')) %>%
  filter(task=='test_response') %>% # only test trials should be true
  filter(completed==TRUE) %>% # completed the assessment
  filter(cohort %in% c('school','prolific')) %>%
  filter(word1 %in% multiafc_distractors$targetWord) %>% # no daivt distractors
  mutate(itemId = paste(word1, word2, sep = "_")) %>%
  mutate(numAFC = 2) %>%
  rename(targetWord = word1) %>%
  mutate(administration = '2afc') %>%
  select(pid, targetWord, itemId, correct, numAFC, administration, cohort)

All multiafc kid data - not including adults right now as they just make the ceiling problem worse.

Now munge data for identical 2afc trials from the multiafc experiment to have the same itemID format (word1_word2)

all_multiafc_data_2afc <- all_kids_multiafc %>%
  filter(itemType=='test') %>%
  filter(numAFC==2) %>%
  right_join(multiafc_distractors) %>%
  mutate(itemId = paste(targetWord, Word2, sep = "_"))  %>%
  mutate(administration = 'multiafc') %>%
  select(pid, targetWord, itemId, correct, numAFC, administration, cohort)

Make format for multiafc (word1_numafc) for other trials and then merge

kid_data_multi_afc_munged <- all_kids_multiafc %>%
  mutate(administration = 'multiafc') %>%
  filter(numAFC > 2) %>%
  mutate(itemId = paste(targetWord, numAFC, sep = "_")) %>%
  select(pid, targetWord, itemId, correct, numAFC, administration, cohort) %>%
  full_join(all_multiafc_data_2afc)

Merge multiafc and 2afc trial level data with same format

all_data <- kid_data_multi_afc_munged %>%
  full_join(all_2afc_data)

Sanity check numbers and participants

all_data %>%
  group_by(administration, cohort) %>%
  summarize(num_participants = length(unique(pid))) %>%
  kable()

administration	cohort	num_participants
2afc	school	1580
multiafc	bing	65
multiafc	school	1428

Sanity check how much data we have by items

data_by_item <- all_data %>%
  group_by(itemId) %>%
  summarize(num_responses = length(unique(pid)))

Should have no items with less than 100 responses

what <- data_by_item %>% filter(num_responses<100)

check <- all_data %>%
  filter(itemId %in% what$itemId)

Once bunch of items that were in both versions, other clusters from the multiafc versions only

hist(data_by_item$num_responses)

Get this itemID list for merging after running the models

Eliminate items, people that are nearly at ceiling in this sample

ceiling_people<- all_data %>%
  select(pid, correct) %>%
  group_by(pid) %>%
  summarize(prop_correct = mean(correct), num_responses = n()) %>%
  filter(prop_correct>.98)

This doesn’t eliminate any data right now but lots of items are close to the 200 threshold, ugh

not_enough_data_by_item <- all_data %>%
  group_by(itemId) %>%
  summarize(trials_per_id = length(unique(pid))) %>%
  filter(trials_per_id<175)

Eliminate people that didn’t complete enough trials with these items (not sure if we should do this since we’re estimating thetas with other data?) It’s about 100 people.

not_enough_data_by_person <- all_data %>%
  filter(administration=='2afc') %>%
  filter(!itemId %in% not_enough_data_by_item$itemId) %>%
  group_by(pid) %>%
  summarize(trials_per_id = length(unique(itemId))) %>%
  filter(trials_per_id<20)

ceiling_items<- all_data %>%
  filter(!pid %in% ceiling_people$pid) %>%
  filter(!pid %in% not_enough_data_by_person$pid) %>%
  select(itemId, correct) %>%
  group_by(itemId) %>%
  summarize(prop_correct = mean(correct), num_responses = n()) %>%
  filter(prop_correct>.97)

cleaned data

cleaned_data <- all_data %>%
  filter(!itemId %in% ceiling_items$itemId) %>%
  filter(!itemId %in% not_enough_data_by_item$itemId) %>%
  filter(!pid %in% ceiling_people$pid) %>%
  filter(!pid %in% not_enough_data_by_person$pid)

itemIdforIRT <- cleaned_data %>%
  distinct(targetWord, numAFC, itemId)

We have FOUR trial types per word – 2AFC hard 2AFC easy 3AFC 4AFC

trials_per_word <- itemIdforIRT %>%
  group_by(targetWord) %>%
  summarize(numAFC = length(unique(itemId)))

Make structure for IRT models

This structure aggregates across administrations for each participant, e.g., if they got a trial right on one take and wrong on another the value is .5

d_wide_all<- cleaned_data %>%
  select(itemId, correct, pid) %>%
  ungroup() %>%
  arrange(itemId) %>%
  ungroup() %>%
  pivot_wider(names_from=itemId, values_from=correct, values_fn = ~mean(.x)) %>%
  ungroup()

d_mat_all <- d_wide_all %>%
  select(-pid) %>%
  data.frame %>%
  data.matrix 

rownames(d_mat_all) <- d_wide_all$pid

Let’s just look at difficulties by item in this subset that we’re modeling

by_item <- cleaned_data %>%
  group_by(targetWord, numAFC) %>%
  summarize(prop_correct = mean(correct)) %>%
  left_join(word1_aoa) %>%
  ungroup() %>%
  mutate(targetWord = fct_reorder(targetWord, -prop_correct))

ggplot(data = by_item, aes(x=targetWord, y=prop_correct, col=AoA_Est_Word1)) +
  geom_point() +
  theme_few() +
  facet_wrap(~numAFC, nrow=3) +
  theme(axis.text.x = element_text(angle = 90, size=6, vjust = 0.5, hjust=1))

There’s a big range here. What about by age?

by_item <- cleaned_data %>%
  left_join(pid_by_age) %>%
  group_by(targetWord, numAFC, age_group) %>%
  summarize(prop_correct = mean(correct), num_participants =length(unique(pid))) %>%
  left_join(word1_aoa) %>%
  ungroup() %>%
  mutate(targetWord = fct_reorder(targetWord, -prop_correct))

Look at how proportion correct varies by target word, age, and numAFC

ggplot(data = by_item, aes(x=targetWord, y=prop_correct, col=numAFC, size=num_participants)) +
  geom_point(alpha=.8) +
  theme_few() +
  facet_grid(numAFC~age_group) +
  theme(axis.text.x = element_text(angle = 90, size=4, vjust = 0.5, hjust=1))

ggplot(data = by_item, aes(x=AoA_Est_Word1, y=prop_correct, col=numAFC, size=num_participants)) +
  geom_point(alpha=.8) +
  theme_few() +
  facet_grid(numAFC~age_group) +
  geom_smooth(span=20) +
  theme(axis.text.x = element_text(angle = 90, size=6, vjust = 0.5, hjust=1))

Reasonable amounts of variability across age

ggplot(data = by_item, aes(x=targetWord, y=prop_correct, col=numAFC, size=num_participants)) +
  geom_point(alpha=.8) +
  theme_few() +
  facet_grid(numAFC~age_group) +
  theme(axis.text.x = element_text(angle = 90, size=6, vjust = 0.5, hjust=1))

Construct a 2PL model where guess rate is specificed

Make a list for specifying guess rate in IRT instead of letting it be a paramters?

itemIdforIRTGuess <- itemIdforIRT %>%
  arrange(itemId) %>%
  mutate(guess_rate = case_when( numAFC==2 ~0.5 ,
                                 numAFC==3 ~0.33 ,
                                 numAFC==4 ~0.25 )) 

list.guessing = itemIdforIRTGuess$guess_rate

Run basic 2PL model with priors specified above and guessing rate per item

start.dim <- dim(d_mat_all)[2]
mm.string = (
  'F = 1-%d,
  PRIOR = (1-%d, a1, norm, 1, 0.5),
  PRIOR = (1-%d, d, norm, 0, 2), 
  LBOUND = (1-%d, a1, 0.0)' # why was this at .5?
  )
mm = mirt.model(sprintf(mm.string, 
                        start.dim,
                        start.dim, 
                        start.dim,
                        start.dim))

Run a 2PL model

mod.vocab.2pl <- mirt(d_mat_all, model = mm, itemtype = '2PL', guess = list.guessing)

## 
Iteration: 1, Log-Lik: -51634.197, Max-Change: 5.38440
Iteration: 2, Log-Lik: -42961.571, Max-Change: 0.47868
Iteration: 3, Log-Lik: -42835.168, Max-Change: 0.19489
Iteration: 4, Log-Lik: -42782.723, Max-Change: 0.13694
Iteration: 5, Log-Lik: -42750.494, Max-Change: 0.10447
Iteration: 6, Log-Lik: -42730.012, Max-Change: 0.08535
Iteration: 7, Log-Lik: -42716.977, Max-Change: 0.06929
Iteration: 8, Log-Lik: -42708.700, Max-Change: 0.05589
Iteration: 9, Log-Lik: -42703.454, Max-Change: 0.04485
Iteration: 10, Log-Lik: -42700.134, Max-Change: 0.03587
Iteration: 11, Log-Lik: -42698.036, Max-Change: 0.02863
Iteration: 12, Log-Lik: -42696.710, Max-Change: 0.02281
Iteration: 13, Log-Lik: -42694.542, Max-Change: 0.00506
Iteration: 14, Log-Lik: -42694.506, Max-Change: 0.00376
Iteration: 15, Log-Lik: -42694.484, Max-Change: 0.00298
Iteration: 16, Log-Lik: -42694.447, Max-Change: 0.00059
Iteration: 17, Log-Lik: -42694.446, Max-Change: 0.00047
Iteration: 18, Log-Lik: -42694.446, Max-Change: 0.00036
Iteration: 19, Log-Lik: -42694.446, Max-Change: 0.00015
Iteration: 20, Log-Lik: -42694.446, Max-Change: 0.00010

Specify a 3PL model, allowing guess rate to vary

start.dim <- dim(d_mat_all)[2]
mm.string = (
  'F = 1-%d,
  PRIOR = (1-%d, a1, norm, 1, 0.5),
  PRIOR = (1-%d, d, norm, 0, 2), 
  LBOUND = (1-%d, g, .25),
  UBOUND = (1-%d, g, 1),
  LBOUND = (1-%d, a1, 0.0)' # why was this at .5?
  )
mm3pl = mirt.model(sprintf(mm.string, 
                        start.dim,
                        start.dim,
                        start.dim,
                        start.dim,
                        start.dim,
                        start.dim))

mod.vocab.3pl <- mirt(d_mat_all, model = mm3pl, itemtype = '3PL')

## 
Iteration: 1, Log-Lik: -53109.953, Max-Change: 3.92342
Iteration: 2, Log-Lik: -43634.487, Max-Change: 0.55905
Iteration: 3, Log-Lik: -43229.082, Max-Change: 0.47732
Iteration: 4, Log-Lik: -42968.010, Max-Change: 0.42427
Iteration: 5, Log-Lik: -42787.191, Max-Change: 0.40080
Iteration: 6, Log-Lik: -42664.554, Max-Change: 0.42137
Iteration: 7, Log-Lik: -42581.357, Max-Change: 0.36815
Iteration: 8, Log-Lik: -42527.257, Max-Change: 0.55529
Iteration: 9, Log-Lik: -42491.761, Max-Change: 0.39804
Iteration: 10, Log-Lik: -42469.541, Max-Change: 0.21810
Iteration: 11, Log-Lik: -42455.408, Max-Change: 0.08159
Iteration: 12, Log-Lik: -42446.480, Max-Change: 0.06901
Iteration: 13, Log-Lik: -42440.759, Max-Change: 0.07742
Iteration: 14, Log-Lik: -42437.086, Max-Change: 0.07718
Iteration: 15, Log-Lik: -42434.726, Max-Change: 0.11554
Iteration: 16, Log-Lik: -42430.732, Max-Change: 0.06554
Iteration: 17, Log-Lik: -42430.614, Max-Change: 0.08884
Iteration: 18, Log-Lik: -42430.578, Max-Change: 0.01894
Iteration: 19, Log-Lik: -42430.569, Max-Change: 0.01253
Iteration: 20, Log-Lik: -42430.554, Max-Change: 0.00532
Iteration: 21, Log-Lik: -42430.544, Max-Change: 0.00325
Iteration: 22, Log-Lik: -42430.531, Max-Change: 0.00089
Iteration: 23, Log-Lik: -42430.530, Max-Change: 0.00070
Iteration: 24, Log-Lik: -42430.529, Max-Change: 0.00057
Iteration: 25, Log-Lik: -42430.528, Max-Change: 0.00012
Iteration: 26, Log-Lik: -42430.528, Max-Change: 0.00016
Iteration: 27, Log-Lik: -42430.528, Max-Change: 0.00006

Tried 3PL models, 2PL had lower AIC/BIC,

anova(mod.vocab.2pl, mod.vocab.3pl)

##                    AIC    SABIC       HQ      BIC    logLik   logPost  df
## mod.vocab.2pl 84953.12 86945.75 86609.54 89481.09 -41678.56 -42694.44  NA
## mod.vocab.3pl 85326.07 88315.01 87810.70 92118.03 -41466.04 -42430.53 399

Get some coefficients for these models – choosing 2pl for now since lower AIC/BIC

co <- coef(mod.vocab.2pl,simplify=TRUE, IRTpars = FALSE) # Get coeeficients
coefs <- tibble::rownames_to_column(as.data.frame(co$items),'itemId')

Estimate reliabilty of this model

Marginal_rxx

mirt::marginal_rxx(mod.vocab.2pl)

## [1] 0.9693657

Empirical_rxx

fs_mod2pl <- fscores(mod.vocab.2pl, full.scores.SE=TRUE)
head(fs_mod2pl)

##                F      SE_F
## [1,] -0.53950006 0.4519553
## [2,] -0.65663442 0.4143779
## [3,] -0.06300122 0.4234623
## [4,]  0.23862979 0.4117100
## [5,] -1.25396162 0.3765784
## [6,]  0.54324603 0.4339549

empirical_rxx(fs_mod2pl)

##         F 
## 0.7772944

item_fits = itemfit(mod.vocab.2pl, fit_stats='infit')

It looks like we have really low infit statistics – I’m not sure what is going on here.

hist(item_fits$infit)

Outfits mostly look ok?

hist(item_fits$outfit)

plot(mod.vocab.2pl)

Trim off items with low slopes & rerunning 2pl

coefs_trimmed <- coefs %>%
  rename(slope = a1) %>%
  filter(slope>.8) %>%
  left_join(itemIdforIRT, by=c('itemId'))  %>%
  group_by(targetWord) %>%
  mutate(average_slope = mean(slope), both_items = length(unique(itemId)))

We got rid of 62 of the items with lowest slopes.

trimmed_items <- coefs %>%
  rename(slope = a1) %>%
  filter(slope<=.8) %>%
  left_join(itemIdforIRT, by=c('itemId'))  %>%
  arrange(targetWord)

Looks like these items come from 38 unqique words, which is nice.

kable(trimmed_items %>% arrange(slope))

itemId	slope	d	g	u	targetWord	numAFC
gondola_trolley	0.0411556	-0.6843378	0.50	1	gondola	2
gondola_3	0.2026996	-0.3793319	0.33	1	gondola	3
candlestick_4	0.2721611	-1.4983416	0.25	1	candlestick	4
gondola_4	0.3541357	-0.1809290	0.25	1	gondola	4
freezer_4	0.4236752	-0.4222056	0.25	1	freezer	4
kimono_turntable	0.4264104	-0.2401230	0.50	1	kimono	2
taillight_3	0.4522444	0.4043826	0.33	1	taillight	3
fox_3	0.4728896	2.2000945	0.33	1	fox	3
freezer_3	0.4966922	-0.6925563	0.33	1	freezer	3
grate_3	0.4990694	-1.9368611	0.33	1	grate	3
sauerkraut_dashboard	0.5094169	0.8279084	0.50	1	sauerkraut	2
sorbet_tamale	0.5197377	0.3846426	0.50	1	sorbet	2
teapot_3	0.5210931	0.7190383	0.33	1	teapot	3
tuxedo_4	0.5270226	-0.9728908	0.25	1	tuxedo	4
cymbal_kaleidoscope	0.5284330	-1.0628817	0.50	1	cymbal	2
tuxedo_3	0.5289096	-1.5052972	0.33	1	tuxedo	3
lollipop_doorbell	0.5370490	2.7611910	0.50	1	lollipop	2
headdress_3	0.5430806	0.0463051	0.33	1	headdress	3
teapot_4	0.5492847	1.0315018	0.25	1	teapot	4
pajamas_pants	0.5585700	-0.6762484	0.50	1	pajamas	2
turtle_frog	0.5624805	1.5306907	0.50	1	turtle	2
taillight_bumper	0.5654226	-0.1817327	0.50	1	taillight	2
grate_4	0.5865926	-1.2388688	0.25	1	grate	4
spatula_paddle	0.5866268	0.1135574	0.50	1	spatula	2
pajamas_cage	0.5955741	2.0764512	0.50	1	pajamas	2
stretcher_thermostat	0.6028205	1.4714070	0.50	1	stretcher	2
koala_3	0.6058408	1.7826383	0.33	1	koala	3
mandolin_4	0.6295693	-0.1363828	0.25	1	mandolin	4
artichoke_sunroof	0.6348734	1.0944322	0.50	1	artichoke	2
oil_3	0.6390161	1.0605928	0.33	1	oil	3
stew_4	0.6551823	-0.3443436	0.25	1	stew	4
pump_coat	0.6661403	2.1142438	0.50	1	pump	2
headdress_poster	0.6719631	0.8980228	0.50	1	headdress	2
teabag_4	0.6744869	1.1388222	0.25	1	teabag	4
sauerkraut_4	0.6750641	-1.5842832	0.25	1	sauerkraut	4
fox_wolf	0.6756263	1.7413645	0.50	1	fox	2
horn_bone	0.6779682	0.2876980	0.50	1	horn	2
grate_crate	0.6861602	-4.3360946	0.50	1	grate	2
prune_raisin	0.6931293	-0.8239175	0.50	1	prune	2
mandolin_gourd	0.6987987	0.1085061	0.50	1	mandolin	2
candlestick_3	0.7007068	-2.1468933	0.33	1	candlestick	3
bobsled_4	0.7040955	-3.3050588	0.25	1	bobsled	4
waterwheel_4	0.7056044	1.0552635	0.25	1	waterwheel	4
cymbal_4	0.7081902	-0.3082975	0.25	1	cymbal	4
gondola_clipboard	0.7194366	1.6118202	0.50	1	gondola	2
tapestry_3	0.7219517	0.4322558	0.33	1	tapestry	3
mulch_compost	0.7230480	-5.3206956	0.50	1	mulch	2
cymbal_mallet	0.7378264	1.2515661	0.50	1	cymbal	2
candlestick_candle	0.7434295	-4.7766115	0.50	1	candlestick	2
pajamas_4	0.7487710	0.2540781	0.25	1	pajamas	4
saddle_figurine	0.7570973	0.5445203	0.50	1	saddle	2
caramel_amber	0.7618761	0.9414672	0.50	1	caramel	2
elbow_4	0.7624635	1.5676900	0.25	1	elbow	4
fox_4	0.7649781	2.3108962	0.25	1	fox	4
telescope_3	0.7697011	2.7948978	0.33	1	telescope	3
waterwheel_target	0.7793944	1.8686696	0.50	1	waterwheel	2
hoe_dustpan	0.7813166	1.9539678	0.50	1	hoe	2
sloth_donkey	0.7837115	2.3409808	0.50	1	sloth	2
sauerkraut_3	0.7875766	-1.2534834	0.33	1	sauerkraut	3
hoe_peg	0.7905833	1.1677296	0.50	1	hoe	2
corset_4	0.7907552	-0.8262486	0.25	1	corset	4
koala_panda	0.7967828	2.0371289	0.50	1	koala	2

Construct a 2PL with trimmed items

Construct trimmed dataframe for refitting 2PL model

d_wide_trimmed<- all_data %>%
  filter(itemId %in% unique(coefs_trimmed$itemId)) %>%
  select(itemId, correct, pid) %>%
  ungroup() %>%
  arrange(itemId) %>%
  ungroup() %>%
  pivot_wider(names_from=itemId, values_from=correct, values_fn = ~mean(.x)) %>%
  ungroup()

d_mat_all_trimmed <- d_wide_trimmed %>%
  select(-pid) %>%
  data.frame %>%
  data.matrix 

rownames(d_mat_all_trimmed) <- d_wide_trimmed$pid

Set model parameters

start.dim <- dim(d_mat_all_trimmed)[2]
mm.trimmed.string = (
  'F = 1-%d,
  PRIOR = (1-%d, a1, norm, 1, 0.5),
  PRIOR = (1-%d, d, norm, 0, 2), 
  LBOUND = (1-%g, g, 0.25), 
  UBOUND = (1-%g, g, 1), 
  LBOUND = (1-%d, a1, 0.0)' # why was this at .5?
  )
mm.trimmed = mirt.model(sprintf(mm.trimmed.string, 
                        start.dim,
                        start.dim, 
                        start.dim,
                        start.dim, 
                        start.dim,
                        start.dim))

Run basic 2PL model with priors specified above and guessing rate per item

First get guessing list

itemIdforIRTTrimmed <- all_data %>%
  filter(itemId %in% unique(coefs_trimmed$itemId)) %>%
  distinct(itemId, targetWord,numAFC) %>%
  arrange(itemId) %>%
  mutate(guess_rate = case_when( numAFC==2 ~0.5 ,
                                 numAFC==3 ~0.33 ,
                                 numAFC==4 ~0.25 )) 

list.guessing.trimmed = itemIdforIRTTrimmed$guess_rate

mod.vocab.2pl.trimmed <- mirt(d_mat_all_trimmed, model = mm.trimmed, itemtype = '2PL', guess = list.guessing.trimmed)

## 
Iteration: 1, Log-Lik: -42083.613, Max-Change: 4.70053
Iteration: 2, Log-Lik: -35150.399, Max-Change: 0.51004
Iteration: 3, Log-Lik: -35002.793, Max-Change: 0.21931
Iteration: 4, Log-Lik: -34929.706, Max-Change: 0.16336
Iteration: 5, Log-Lik: -34884.414, Max-Change: 0.13077
Iteration: 6, Log-Lik: -34856.092, Max-Change: 0.10435
Iteration: 7, Log-Lik: -34838.407, Max-Change: 0.08309
Iteration: 8, Log-Lik: -34827.382, Max-Change: 0.06599
Iteration: 9, Log-Lik: -34820.517, Max-Change: 0.05231
Iteration: 10, Log-Lik: -34816.247, Max-Change: 0.04139
Iteration: 11, Log-Lik: -34813.593, Max-Change: 0.03271
Iteration: 12, Log-Lik: -34811.944, Max-Change: 0.02583
Iteration: 13, Log-Lik: -34809.371, Max-Change: 0.00580
Iteration: 14, Log-Lik: -34809.325, Max-Change: 0.00433
Iteration: 15, Log-Lik: -34809.296, Max-Change: 0.00340
Iteration: 16, Log-Lik: -34809.251, Max-Change: 0.00073
Iteration: 17, Log-Lik: -34809.250, Max-Change: 0.00056
Iteration: 18, Log-Lik: -34809.250, Max-Change: 0.00045
Iteration: 19, Log-Lik: -34809.249, Max-Change: 0.00020
Iteration: 20, Log-Lik: -34809.249, Max-Change: 0.00013
Iteration: 21, Log-Lik: -34809.249, Max-Change: 0.00011
Iteration: 22, Log-Lik: -34809.249, Max-Change: 0.00011
Iteration: 23, Log-Lik: -34809.249, Max-Change: 0.00004

co_trimmed_model <- coef(mod.vocab.2pl.trimmed,simplify=TRUE, IRTpars = FALSE) # Get coeeficients
coefs_trimmed_model <- tibble::rownames_to_column(as.data.frame(co_trimmed_model$items),'itemId')

# coefs_trimmed_model %>%
  # arrange(a1) %>%
  # kable()

 hard_but_good <- coefs_trimmed_model %>%
  filter(d>quantile(d,.75)) %>%
  arrange(a1) %>%
  kable()

marginal_rxx(mod.vocab.2pl)

## [1] 0.9693657

Do our items load onto one factor mostly?

item_check = as.data.frame(summary(mod.vocab.2pl.trimmed))

##                           F    h2
## acorn_3               0.578 0.334
## acorn_4               0.645 0.416
## acorn_coconut         0.609 0.371
## acorn_key             0.537 0.289
## aloe_3                0.601 0.362
## aloe_4                0.712 0.506
## aloe_bracket          0.643 0.414
## aloe_cactus           0.662 0.438
## antenna_3             0.595 0.354
## antenna_4             0.610 0.373
## antenna_pinecone      0.527 0.278
## antenna_stem          0.597 0.356
## artichoke_3           0.501 0.251
## artichoke_4           0.631 0.398
## artichoke_leek        0.572 0.328
## bamboo_3              0.637 0.406
## bamboo_4              0.663 0.439
## bamboo_lumber         0.620 0.384
## bamboo_yacht          0.432 0.187
## barrel_3              0.616 0.379
## barrel_4              0.583 0.340
## barrel_bucket         0.561 0.315
## barrel_shelf          0.500 0.250
## blender_3             0.604 0.365
## blender_4             0.617 0.380
## blender_blind         0.506 0.256
## blender_mailbox       0.533 0.284
## blower_3              0.655 0.429
## blower_4              0.456 0.208
## blower_buggy          0.447 0.200
## blower_burner         0.558 0.312
## bobsled_3             0.649 0.421
## bobsled_dip           0.499 0.249
## bobsled_sidecar       0.466 0.217
## bouquet_3             0.718 0.516
## bouquet_4             0.709 0.503
## bouquet_bandanna      0.711 0.505
## bouquet_centerpiece   0.733 0.537
## buffet_3              0.598 0.357
## buffet_4              0.662 0.438
## buffet_counter        0.652 0.425
## buffet_crib           0.514 0.264
## bulldozer_3           0.474 0.224
## bulldozer_4           0.634 0.403
## bulldozer_orange      0.539 0.291
## bulldozer_tractor     0.599 0.359
## cake_3                0.568 0.323
## cake_4                0.610 0.372
## cake_dessert          0.455 0.207
## candlestick_egg       0.537 0.289
## caramel_3             0.537 0.289
## caramel_4             0.648 0.420
## caramel_olive         0.604 0.365
## carousel_3            0.540 0.292
## carousel_4            0.658 0.432
## carousel_carriage     0.562 0.316
## carousel_dish         0.557 0.310
## carrot_3              0.613 0.376
## carrot_4              0.611 0.373
## carrot_phone          0.582 0.338
## cassette_3            0.585 0.342
## cassette_4            0.598 0.357
## cassette_book         0.525 0.276
## cassette_tape         0.526 0.277
## cheese_3              0.668 0.447
## cheese_4              0.708 0.501
## cheese_butter         0.742 0.551
## cloak_3               0.653 0.427
## cloak_4               0.718 0.516
## cloak_cinnamon        0.605 0.366
## cloak_hood            0.599 0.359
## clothespin_3          0.501 0.251
## clothespin_4          0.451 0.204
## clothespin_toothpick  0.588 0.346
## clothespin_volleyball 0.581 0.337
## coaster_3             0.634 0.402
## coaster_4             0.589 0.347
## coaster_card          0.532 0.283
## coaster_painting      0.499 0.249
## cork_3                0.633 0.401
## cork_4                0.710 0.503
## cork_jersey           0.581 0.338
## cork_skateboard       0.623 0.388
## cornbread_3           0.575 0.331
## cornbread_4           0.529 0.280
## cornbread_corn        0.630 0.397
## corset_3              0.479 0.230
## corset_harness        0.699 0.489
## corset_mantle         0.492 0.242
## cymbal_3              0.446 0.198
## dumpling_3            0.568 0.323
## dumpling_4            0.524 0.274
## dumpling_meatball     0.452 0.204
## dumpling_rocket       0.660 0.436
## elbow_3               0.551 0.303
## elbow_arm             0.505 0.255
## fan_3                 0.573 0.329
## fan_4                 0.586 0.343
## flan_3                0.543 0.295
## flan_4                0.503 0.253
## flan_amplifier        0.583 0.340
## flan_fuse             0.580 0.337
## foam_3                0.595 0.354
## foam_4                0.675 0.456
## foam_float            0.718 0.515
## foam_quilt            0.515 0.265
## footbath_3            0.555 0.308
## footbath_4            0.657 0.431
## freezer_cooler        0.500 0.250
## fruitcake_3           0.547 0.299
## fruitcake_4           0.647 0.419
## fruitcake_dough       0.435 0.189
## fruitcake_fudge       0.572 0.327
## grate_reel            0.516 0.267
## gutter_3              0.514 0.264
## gutter_4              0.579 0.336
## gutter_almond         0.577 0.332
## gutter_filter         0.552 0.305
## hamster_3             0.673 0.453
## hamster_4             0.633 0.400
## hamster_rabbit        0.603 0.363
## hamster_tadpole       0.620 0.385
## headdress_4           0.452 0.205
## headdress_crown       0.417 0.174
## hedgehog_3            0.621 0.386
## hedgehog_4            0.648 0.420
## hedgehog_iguana       0.490 0.240
## hedgehog_owl          0.650 0.423
## hoe_3                 0.475 0.225
## hoe_4                 0.552 0.304
## hopscotch_3           0.628 0.394
## hopscotch_4           0.664 0.440
## hopscotch_chalk       0.619 0.383
## hopscotch_funnel      0.654 0.428
## horn_3                0.453 0.205
## horn_4                0.443 0.196
## horn_chin             0.460 0.211
## kimono_3              0.672 0.452
## kimono_4              0.579 0.335
## kimono_cardigan       0.633 0.401
## koala_4               0.608 0.370
## latch_3               0.618 0.381
## latch_4               0.629 0.395
## latch_lock            0.551 0.304
## latch_microwave       0.568 0.323
## locker_3              0.550 0.303
## locker_4              0.692 0.479
## locker_basket         0.615 0.379
## locker_cabinet        0.498 0.248
## lollipop_3            0.556 0.309
## lollipop_4            0.573 0.328
## lollipop_candy        0.614 0.377
## mandolin_3            0.499 0.249
## mandolin_mast         0.515 0.265
## map_4                 0.614 0.377
## map_marker            0.485 0.235
## map_ruby              0.476 0.227
## marshmallow_3         0.594 0.353
## marshmallow_4         0.539 0.291
## mulch_3               0.621 0.385
## mulch_4               0.607 0.368
## mulch_clarinet        0.497 0.247
## net_3                 0.687 0.472
## net_4                 0.732 0.536
## net_domino            0.620 0.384
## net_tee               0.713 0.509
## oatmeal_3             0.500 0.250
## oatmeal_4             0.543 0.295
## oatmeal_cereal        0.486 0.236
## oil_4                 0.428 0.183
## oil_gel               0.538 0.290
## oil_grain             0.646 0.418
## omelet_3              0.688 0.473
## omelet_4              0.663 0.439
## omelet_clay           0.632 0.400
## omelet_pancake        0.525 0.275
## otter_3               0.609 0.371
## otter_4               0.500 0.250
## otter_beaver          0.669 0.448
## otter_goose           0.425 0.181
## pajamas_3             0.595 0.354
## parsley_3             0.627 0.393
## parsley_4             0.665 0.442
## parsley_basil         0.498 0.248
## parsley_tiara         0.571 0.326
## pie_3                 0.545 0.297
## pie_4                 0.591 0.349
## pistachio_3           0.545 0.297
## pistachio_4           0.673 0.453
## pistachio_avocado     0.479 0.230
## pistachio_taco        0.489 0.239
## pitcher_3             0.720 0.518
## pitcher_4             0.790 0.624
## pitcher_batter        0.735 0.541
## pitcher_tumbleweed    0.574 0.329
## potato_3              0.547 0.299
## potato_4              0.608 0.369
## potato_pot            0.642 0.412
## prism_3               0.699 0.489
## prism_4               0.722 0.522
## prism_radar           0.687 0.473
## prism_subway          0.502 0.252
## prune_3               0.517 0.268
## prune_4               0.612 0.374
## prune_headrest        0.611 0.374
## puddle_3              0.609 0.371
## puddle_4              0.661 0.437
## puddle_baseball       0.581 0.337
## puddle_bubble         0.507 0.257
## pump_3                0.521 0.271
## pump_4                0.618 0.382
## pump_plug             0.606 0.368
## rice_3                0.604 0.365
## rice_4                0.540 0.292
## rice_dice             0.555 0.308
## saddle_3              0.559 0.312
## saddle_4              0.659 0.434
## saddle_handle         0.618 0.382
## sandbag_3             0.422 0.178
## sandbag_4             0.471 0.222
## sandbag_sandpaper     0.479 0.229
## sandbag_valve         0.566 0.320
## sauerkraut_potpourri  0.500 0.250
## scaffolding_3         0.537 0.288
## scaffolding_4         0.633 0.400
## scaffolding_satellite 0.649 0.421
## scaffolding_veil      0.537 0.288
## scoop_3               0.506 0.256
## scoop_4               0.544 0.296
## scoop_sauce           0.505 0.255
## scoop_statue          0.467 0.218
## seagull_3             0.617 0.381
## seagull_4             0.501 0.251
## seagull_pigeon        0.635 0.404
## ship_3                0.577 0.333
## ship_4                0.557 0.311
## ship_boat             0.635 0.403
## shower_3              0.635 0.404
## shower_4              0.650 0.423
## shower_soap           0.511 0.261
## shower_toe            0.553 0.306
## silverware_3          0.594 0.353
## silverware_4          0.671 0.450
## silverware_garlic     0.554 0.307
## silverware_trophy     0.633 0.401
## sink_3                0.643 0.414
## sink_4                0.575 0.331
## sink_stair            0.657 0.432
## sink_toilet           0.603 0.364
## ski_3                 0.580 0.337
## ski_4                 0.657 0.432
## ski_ice               0.613 0.376
## ski_suitcase          0.620 0.384
## sloth_3               0.469 0.220
## sloth_4               0.443 0.196
## sloth_clam            0.495 0.245
## snail_3               0.606 0.367
## snail_4               0.609 0.371
## snail_worm            0.614 0.377
## sorbet_3              0.496 0.246
## sorbet_4              0.542 0.294
## sorbet_palette        0.497 0.247
## spatula_3             0.514 0.264
## spatula_4             0.433 0.188
## spatula_lava          0.593 0.352
## sprinkler_3           0.576 0.332
## sprinkler_4           0.637 0.406
## sprinkler_sparkler    0.443 0.196
## squash_3              0.556 0.309
## squash_4              0.470 0.221
## squash_branch         0.532 0.283
## squash_pumpkin        0.618 0.382
## squirrel_3            0.595 0.354
## squirrel_4            0.499 0.249
## squirrel_eagle        0.546 0.298
## squirrel_monkey       0.471 0.222
## stew_3                0.609 0.371
## stew_mixer            0.564 0.318
## stew_soup             0.642 0.412
## stretcher_3           0.700 0.491
## stretcher_4           0.649 0.421
## stretcher_drill       0.649 0.421
## stump_3               0.665 0.443
## stump_4               0.593 0.352
## stump_bookshelf       0.721 0.520
## stump_log             0.699 0.488
## sunflower_4           0.555 0.308
## swordfish_3           0.585 0.342
## swordfish_4           0.625 0.391
## swordfish_catfish     0.746 0.557
## swordfish_leopard     0.559 0.312
## taillight_4           0.528 0.279
## taillight_luggage     0.674 0.455
## tapestry_4            0.594 0.352
## tapestry_backdrop     0.610 0.372
## tapestry_leggings     0.512 0.262
## teabag_3              0.523 0.274
## teabag_fingerprint    0.605 0.366
## teabag_tea            0.530 0.281
## teapot_brake          0.603 0.363
## teapot_teacup         0.534 0.285
## telescope_4           0.542 0.294
## telescope_tripod      0.450 0.203
## thermos_3             0.667 0.445
## thermos_4             0.588 0.345
## thermos_firewood      0.659 0.434
## thermos_oilcan        0.664 0.441
## treasure_3            0.594 0.353
## treasure_4            0.446 0.199
## treasure_gift         0.483 0.233
## trumpet_3             0.509 0.259
## trumpet_4             0.673 0.453
## trumpet_flute         0.557 0.310
## trumpet_violin        0.454 0.206
## tulip_3               0.507 0.257
## tulip_4               0.535 0.287
## tulip_glove           0.666 0.444
## tulip_rose            0.569 0.323
## turbine_3             0.610 0.373
## turbine_4             0.568 0.323
## turbine_generator     0.646 0.417
## turbine_tab           0.684 0.468
## turkey_3              0.478 0.228
## turkey_4              0.617 0.381
## turkey_goat           0.693 0.481
## turtle_3              0.524 0.275
## turtle_4              0.476 0.227
## turtle_horse          0.642 0.413
## tuxedo_stroller       0.521 0.271
## tuxedo_suit           0.521 0.271
## typewriter_3          0.523 0.273
## typewriter_4          0.697 0.486
## typewriter_printer    0.510 0.260
## typewriter_sunglasses 0.636 0.404
## watermelon_3          0.492 0.242
## waterwheel_3          0.580 0.337
## waterwheel_pinwheel   0.640 0.410
## 
## SS loadings:  115.431 
## Proportion Var:  0.343 
## 
## Factor correlations: 
## 
##   F
## F 1

item_check_2 = rownames_to_column(item_check)

item_info <- item_check_2 %>%
  as_tibble() %>%
  rename(itemId = rowname) %>%
  mutate(targetWord = str_split_fixed(itemId, '_',2)[,1]) %>%
    mutate(numAFC = str_split_fixed(itemId, '_',2)[,2]) %>%
  left_join(word1_aoa) %>%
  group_by(targetWord) %>%
  mutate(avg_h2 = mean(h2)) %>%
  ungroup() %>%
  mutate(targetWordOrdered = fct_reorder(targetWord, avg_h2))

Here, each dot is a trial type, and the y-axis the factor loading for that trial – looks like we have a pretty even spead from .4 to .8

ggplot(data = item_info, aes(x=targetWordOrdered, y=F, col=AoA_Est_Word1)) +
  theme_few(base_size = 12) +
  geom_point()  +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size=4), aspect.ratio = .4)

  # facet_wrap(~numAFC, nrow=1)

Now do some pre-processing to get individual theta estiamtes

Create sandbox participant who has seen all trials

list.items <- itemIdforIRTTrimmed$itemId
df.sandbox.participant <- 
  tibble(pid = "sandbox", itemId = list.items, correct = 1)

Create trial ID structure for matrix

duplicate_ids <- school_data_multi_afc_cleaned %>%
  group_by(pid) %>%
  summarize(duplicates = length(unique(runId))) %>%
  filter(duplicates>1)

ceiling_kids_multiafc <- school_data_multi_afc_cleaned %>%
  group_by(pid) %>%
  summarize(pc = mean(correct)) %>%
  filter(pc>.95)

df.rocketship.roar.trials.clean <- school_data_multi_afc_cleaned %>% 
  filter(pid %in% completed_twice$pid) %>%
  filter(!pid %in% ceiling_kids_multiafc$pid) %>%
  filter(!pid %in% duplicate_ids$pid) %>%
  filter(studyId %in%  c("school-multiAFC"), itemType == "test")  %>%
  left_join(itemIdforIRT, by=c('targetWord','numAFC')) %>%
  filter(itemId %in% list.items) %>%
  dplyr :: distinct(pid, runId, studyId, start_time, numAFC, targetWord, itemId, correct, rt)

df.rocketship.2afc.roar.trials.clean <- school_data_2afc_cleaned %>% 
  mutate(itemId = paste(word1, word2, sep = "_")) %>%
  filter(itemId %in% list.items) %>%
  dplyr :: distinct(pid, runId, studyId, start_time, numAFC, word1, word2, itemId, correct, rt)

There were 155 trials in the 2AFC version.

Make each of the data structures for getting thetas

df.multiAFC.resp <- df.rocketship.roar.trials.clean %>% 
  select(pid, itemId, correct) %>% 
  rbind(df.sandbox.participant) %>%
  unique() %>% 
  pivot_wider(names_from = itemId, values_fn = mean, values_from = correct, values_fill = NA)

df.2AFC.resp <- df.rocketship.roar.trials.clean %>% 
  filter(numAFC == 2) %>% 
  distinct(pid, itemId, correct) %>% 
  rbind(df.sandbox.participant) %>%
  unique() %>% 
  pivot_wider(names_from = itemId, values_fn = mean, values_from = correct, values_fill = NA)

df.3AFC.resp <- df.rocketship.roar.trials.clean %>% 
  filter(numAFC == 3) %>% 
  distinct(pid, itemId, correct) %>% 
  rbind(df.sandbox.participant) %>%
  unique() %>% 
  pivot_wider(names_from = itemId, values_fn = mean, values_from = correct, values_fill = NA)

df.4AFC.resp <- df.rocketship.roar.trials.clean %>% 
  filter(numAFC == 4) %>% 
  distinct(pid, itemId, correct) %>% 
  rbind(df.sandbox.participant) %>%
  unique() %>% 
  pivot_wider(names_from = itemId, values_fn = mean, values_from = correct, values_fill = NA)

Make the data structures for getting thetas from 2afc

duplicates_first_admin <- df.rocketship.2afc.roar.trials.clean %>%
  group_by(pid) %>%
  summarize(duplicates = length(unique(runId))>1) %>%
  filter(duplicates==TRUE)

Eliminate kids who don’t have enough items

kids_not_enough_items <- df.rocketship.2afc.roar.trials.clean %>%
  filter(!pid %in% duplicates_first_admin$pid) %>%
  distinct(pid, itemId, correct)  %>%
  filter(itemId %in% list.items) %>%
  group_by(pid) %>%
  summarize(items_per_kid = length(unique(itemId))) %>%
  filter(items_per_kid<10)

Or kids who end up being at ceiling in this subset

kids_ceiling <- df.rocketship.2afc.roar.trials.clean %>%
  filter(!pid %in% duplicates_first_admin$pid) %>%
  distinct(pid, itemId, correct)  %>%
  filter(itemId %in% list.items) %>%
  group_by(pid) %>%
  summarize(pc = mean(correct)) %>%
  filter(pc>.95)

df.2AFC.first_admin <- df.rocketship.2afc.roar.trials.clean %>% 
  filter(!pid %in% kids_ceiling$pid) %>%
  filter(!pid %in% duplicates_first_admin$pid) %>%
  filter(!pid %in% kids_not_enough_items$pid) %>%
  filter(pid %in% unique(df.rocketship.roar.trials.clean$pid)) %>%
  distinct(pid, itemId, correct) %>% 
  rbind(df.sandbox.participant) %>%
  unique() %>% 
  pivot_wider(names_from = itemId, values_from = correct, values_fn = mean, values_fill = NA)

Get theta score based on these reseponse pattenrs

getTheta <- function(df){
  thetas <- fscores(mod.vocab.2pl.trimmed, method = "ML", response.pattern = df %>% select(-pid), max_theta = 8, min_theta = -8, theta_lim = c(-8, 8))
  return (data.frame(thetas)$F)
}

Overall and based on 234afc separately

df.thetas.students <- df.multiAFC.resp %>% 
    dplyr :: select(pid) %>% 
    add_column(theta.multiAFC = getTheta(df.multiAFC.resp), 
               theta.2AFC = getTheta(df.2AFC.resp),
               theta.3AFC = getTheta(df.3AFC.resp),
               theta.4AFC = getTheta(df.4AFC.resp)
               )

sum(df.multiAFC.resp$pid==df.2AFC.resp$pid)

## [1] 850

Overall and based on 234afc separately

df.thetas.students.first_admin <- df.2AFC.first_admin %>% 
    dplyr :: select(pid) %>% 
    add_column(theta.2AFC = getTheta(df.2AFC.first_admin))

Now get thetas for 2AFC kids with same model

dfs_all <- df.thetas.students.first_admin %>%
  rename(theta.2AFC.initial = theta.2AFC) %>%
  right_join(df.thetas.students, by=c('pid')) %>%
  left_join(pid_by_age) %>%
  mutate(age_group = as.numeric(age_group)) %>%
  filter(!is.na(age_group))

Students who did both multiAFC and 2AFC assessments for each age group

ggplot(data=dfs_all, aes(x=theta.2AFC.initial, y=theta.multiAFC, col=age_group)) +
  geom_point(alpha=.6) +
  stat_cor(cor.coef.name = 'r', aes(label = ..r.label..))  +
  geom_smooth(span=20) +
  facet_grid(~age_group) +
  theme(legend.position='none')

ggplot(data=dfs_all,  aes(x=theta.2AFC.initial, y=theta.multiAFC, col=age_group)) +
  geom_point(alpha=.6) +
  stat_cor(cor.coef.name = 'r', aes(label = ..r.label..))  +
  geom_smooth(span=20) +
  theme(legend.position='none')

Correlation with age isn’t thaaat bad

ggplot(data=dfs_all, aes(x=age_numeric, y=theta.multiAFC, col=age_group)) +
  geom_point(alpha=.6) +
  stat_cor(cor.coef.name = 'r', aes(label = ..r.label..))  +
  geom_smooth(span=20) +
  # facet_grid(~age_group) +
  theme(legend.position='none') +
  ggtitle('Theta by age')

Construct model & estimate reliability with only this subset

Preprocess data structures

Kick out items and kids at ceiling in this subset

ceiling <- all_data %>%
  filter(pid %in% dfs_all$pid) %>%
  group_by(itemId) %>%
  summarise(pc = mean(correct)) %>%
  filter(pc==1)

ceiling_kids <- all_data %>%
  filter(pid %in% dfs_all$pid) %>%
  group_by(pid) %>%
  summarise(pc = mean(correct)) %>%
  filter(pc>.9)

# runs_by_pid<- all_data %>%
#   filter(pid %in% dfs_all$pid) %>%
#   filter(!pid %in% ceiling_kids$pid) %>%
#   filter(itemId %in% unique(coefs_trimmed$itemId)) %>%
#   filter(!itemId %in% unique(ceiling$itemId)) %>%
#   mutate(pid_unique = paste0(pid, '_', administration)) %>%
#   group_by(pid_unique) %>%
#   summarize(num_runs = length(unique(runId)))

d_wide_school<- all_data %>%
  filter(pid %in% dfs_all$pid) %>%
  filter(!pid %in% ceiling_kids$pid) %>%
  filter(itemId %in% unique(coefs_trimmed$itemId)) %>%
  filter(!itemId %in% unique(ceiling$itemId)) %>%
  mutate(pid_unique = paste0(pid, '_', administration)) %>%
  select(itemId, correct, pid_unique) %>%
  ungroup() %>%
  arrange(itemId) %>%
  ungroup() %>%
  pivot_wider(names_from=itemId, values_from=correct, values_fn = ~mean(.x)) %>%
  ungroup()

d_mat_school <- d_wide_school %>%
  select(-pid_unique) %>%
  data.frame %>%
  data.matrix 

rownames(d_mat_school) <- d_wide_school$pid_unique

assert_that(sum(d_wide_school$pid_unique!=rownames(d_mat_school))==0)

## [1] TRUE

Make guess list

items_d_mat_school = colnames(d_mat_school)

itemIdforIRTGuess <- itemIdforIRT %>%
  filter(itemId %in% items_d_mat_school) %>%
  arrange(itemId) %>%
  mutate(guess_rate = case_when( numAFC==2 ~0.5 ,
                                 numAFC==3 ~0.33 ,
                                 numAFC==4 ~0.25 )) 

list.guessing = itemIdforIRTGuess$guess_rate

Construct 2PL model with rocketship kids only

start.dim <- dim(d_mat_school)[2]
mm.string = (
  'F = 1-%d,
  PRIOR = (1-%d, a1, norm, 1, 0.5),
  PRIOR = (1-%d, d, norm, 0, 2), 
  LBOUND = (1-%d, a1, 0.0)' # why was this at .5?
  )
mm = mirt.model(sprintf(mm.string, 
                        start.dim,
                        start.dim, 
                        start.dim,
                        start.dim))

mod.vocab.2pl.rocketship <- mirt(d_mat_school, model = mm, itemtype = '2PL', guess=list.guessing)

## 
Iteration: 1, Log-Lik: -22889.330, Max-Change: 4.46130
Iteration: 2, Log-Lik: -18815.557, Max-Change: 0.46271
Iteration: 3, Log-Lik: -18746.126, Max-Change: 0.16113
Iteration: 4, Log-Lik: -18726.956, Max-Change: 0.10280
Iteration: 5, Log-Lik: -18718.841, Max-Change: 0.07003
Iteration: 6, Log-Lik: -18715.118, Max-Change: 0.04766
Iteration: 7, Log-Lik: -18713.377, Max-Change: 0.03260
Iteration: 8, Log-Lik: -18712.560, Max-Change: 0.02239
Iteration: 9, Log-Lik: -18712.175, Max-Change: 0.01535
Iteration: 10, Log-Lik: -18711.868, Max-Change: 0.00491
Iteration: 11, Log-Lik: -18711.849, Max-Change: 0.00336
Iteration: 12, Log-Lik: -18711.841, Max-Change: 0.00232
Iteration: 13, Log-Lik: -18711.834, Max-Change: 0.00071
Iteration: 14, Log-Lik: -18711.833, Max-Change: 0.00050
Iteration: 15, Log-Lik: -18711.833, Max-Change: 0.00035
Iteration: 16, Log-Lik: -18711.833, Max-Change: 0.00012
Iteration: 17, Log-Lik: -18711.833, Max-Change: 0.00012
Iteration: 18, Log-Lik: -18711.833, Max-Change: 0.00010

Marginal and empirical reliability in this sample

Different ways of estimating reliability

marginal_rxx(mod.vocab.2pl.rocketship)

## [1] 0.9569501

fs <- fscores(mod.vocab.2pl.rocketship, full.scores.SE=TRUE)
head(fs)

##               F      SE_F
## [1,] -0.0633073 0.4993017
## [2,]  0.5219550 0.5768861
## [3,] -1.6932029 0.4699129
## [4,] -0.1377871 0.4979300
## [5,]  0.6879826 0.5387015
## [6,] -0.3753503 0.4952175

empirical_rxx(fs)

##         F 
## 0.6275197

Check whcih items are left

final_items_left <- itemIdforIRTTrimmed %>%
  filter(numAFC==4) %>%
  left_join(word1_aoa, by=c('targetWord')) %>%
  arrange(AoA_Est_Word1) 

kable(final_items_left)

itemId	targetWord	numAFC	guess_rate	AoA_Est_Word1
carrot_4	carrot	4	0.25	2.740000
cake_4	cake	4	0.25	3.260000
pie_4	pie	4	0.25	3.670000
rice_4	rice	4	0.25	3.720000
marshmallow_4	marshmallow	4	0.25	3.800000
lollipop_4	lollipop	4	0.25	3.890000
turkey_4	turkey	4	0.25	3.950000
turtle_4	turtle	4	0.25	4.170000
cheese_4	cheese	4	0.25	4.330000
hamster_4	hamster	4	0.25	4.370000
squirrel_4	squirrel	4	0.25	4.440000
sink_4	sink	4	0.25	4.470000
oatmeal_4	oatmeal	4	0.25	4.500000
shower_4	shower	4	0.25	4.720000
horn_4	horn	4	0.25	4.840000
potato_4	potato	4	0.25	4.840000
scoop_4	scoop	4	0.25	5.140000
ship_4	ship	4	0.25	5.330000
puddle_4	puddle	4	0.25	5.370000
seagull_4	seagull	4	0.25	5.420000
otter_4	otter	4	0.25	5.470000
sprinkler_4	sprinkler	4	0.25	5.530000
map_4	map	4	0.25	5.600000
bulldozer_4	bulldozer	4	0.25	5.610000
fan_4	fan	4	0.25	5.630000
snail_4	snail	4	0.25	5.790000
hopscotch_4	hopscotch	4	0.25	5.800000
silverware_4	silverware	4	0.25	5.890000
acorn_4	acorn	4	0.25	5.950000
koala_4	koala	4	0.25	6.000000
sunflower_4	sunflower	4	0.25	6.000000
pump_4	pump	4	0.25	6.060000
treasure_4	treasure	4	0.25	6.060000
foam_4	foam	4	0.25	6.150000
trumpet_4	trumpet	4	0.25	6.280000
cassette_4	cassette	4	0.25	6.370000
cornbread_4	cornbread	4	0.25	6.400000
pitcher_4	pitcher	4	0.25	6.420000
saddle_4	saddle	4	0.25	6.420000
omelet_4	omelet	4	0.25	6.500000
caramel_4	caramel	4	0.25	6.580000
carousel_4	carousel	4	0.25	6.680000
ski_4	ski	4	0.25	6.680000
typewriter_4	typewriter	4	0.25	6.740000
blender_4	blender	4	0.25	6.840000
squash_4	squash	4	0.25	6.940000
cloak_4	cloak	4	0.25	6.950000
telescope_4	telescope	4	0.25	6.950000
net_4	net	4	0.25	7.000000
antenna_4	antenna	4	0.25	7.050000
tulip_4	tulip	4	0.25	7.148876
spatula_4	spatula	4	0.25	7.320000
oil_4	oil	4	0.25	7.330000
buffet_4	buffet	4	0.25	7.580000
stump_4	stump	4	0.25	7.580000
barrel_4	barrel	4	0.25	7.720000
gutter_4	gutter	4	0.25	7.790000
clothespin_4	clothespin	4	0.25	7.830000
sandbag_4	sandbag	4	0.25	7.830000
prune_4	prune	4	0.25	7.850000
coaster_4	coaster	4	0.25	7.890000
latch_4	latch	4	0.25	8.000000
locker_4	locker	4	0.25	8.220000
fruitcake_4	fruitcake	4	0.25	8.280000
thermos_4	thermos	4	0.25	8.290000
blower_4	blower	4	0.25	8.320000
sloth_4	sloth	4	0.25	8.370000
dumpling_4	dumpling	4	0.25	8.500000
footbath_4	footbath	4	0.25	8.530000
hedgehog_4	hedgehog	4	0.25	8.630000
bouquet_4	bouquet	4	0.25	8.720000
hoe_4	hoe	4	0.25	8.720000
pistachio_4	pistachio	4	0.25	8.740000
aloe_4	aloe	4	0.25	8.950000
parsley_4	parsley	4	0.25	8.950000
cork_4	cork	4	0.25	9.050000
headdress_4	headdress	4	0.25	9.060000
mulch_4	mulch	4	0.25	9.220000
bamboo_4	bamboo	4	0.25	9.280000
swordfish_4	swordfish	4	0.25	9.330000
taillight_4	taillight	4	0.25	9.330000
stretcher_4	stretcher	4	0.25	9.390000
prism_4	prism	4	0.25	9.781155
artichoke_4	artichoke	4	0.25	10.000000
tapestry_4	tapestry	4	0.25	10.950000
turbine_4	turbine	4	0.25	11.060000
scaffolding_4	scaffolding	4	0.25	11.220000
kimono_4	kimono	4	0.25	11.370000
sorbet_4	sorbet	4	0.25	12.000000
flan_4	flan	4	0.25	12.650000

```

Reliability Analyses July 2023

Bria Long

2023-05-19