Setup

library(tidyverse)
library(lubridate)
library(ggthemes)
library(langcog)
library(mirt)
library(knitr)
library("tidyverse") # for data wrangling 
library(dplyr)
#anya
# prolific_data <- read_csv("../../data/prolific_data/20230427_vocab_prolificMultiAFC_deidentified.csv") %>%
# as_tibble()

# school_data <- read_csv("~/Documents/Data/roar_trial_data/school-vocab-multiAFC.csv") %>% mutate(schoolId = "school")

# cdm_data <- read_csv("~/Documents/Data/roar_trial_data/cdm-vocab-multiAFC.csv") %>% mutate(schoolId = "cdm")
# Bria
prolific_data <- read_csv(here::here('data/data_multiafc/multi-afc-april28.csv')) %>%
  as_tibble() %>%
  filter(!is.na(itemType)) %>%
  mutate(schoolId = "prolific") %>%
  mutate(age_numeric = 30) 
  # mutate(age = 'adult')
# 
school_data <- read_csv(here::here('data/data_multiafc/school-afc-trials.csv')) %>%
    as_tibble %>%
    mutate(schoolId = "school")

school_meta <- read_csv(here::here('data/rocketship_data/cs01-metadata-age.csv')) %>%
    as_tibble() %>%
    mutate(age_numeric = as.numeric(age))  %>%
    select(pid, grade, age_numeric)
# mismatched_columns = colnames(prolific_data)[!colnames(prolific_data) %in% colnames(school_data)]
# Use joins to get rid of mismatch columns
pilot_data <- school_data %>%
  left_join(school_meta) %>%
  full_join(prolific_data) %>% 
  # full_join(cdm_data) %>% 
  filter(!is.na(pid))

Load data and impose basic filtering requirements

test_data <- pilot_data %>%
  filter(!block %in% c('practice'), completed == TRUE) %>%
  filter(rt>200) %>%
  filter(rt<5000) %>%
  mutate(item_pair = paste0(targetWord,'_',numAFC)) %>%
  mutate(sub_id = pid, numAFC = as.character(numAFC))

We have 418 participants.

sanity <- test_data %>%
  group_by(pid, numAFC, itemType) %>%
  summarize(num_trials = length(trialId), num_test = sum(task=='test_response'), num_prac = sum(task=='practice_response'), date_took = timeFinished[1], completed_or_not = sum(completed), unique_words = length(unique(targetWord)))
low_performers <- test_data %>%
  filter(itemType=='catch') %>%
  group_by(pid) %>%
  summarize(prop_catch = mean(correct)) %>%
  filter(prop_catch<1)

We filtered out 46 for missing catch trials.

test_data <- test_data %>%
  filter(!pid %in% low_performers$pid) %>% 
  filter(itemType != "catch")

Basic descriptives before IRT models

cor_by_afc <- test_data %>%
  group_by(pid, numAFC, schoolId) %>%
  summarize(prop_correct = mean(correct))

Accuracy by AFC at participant level

ggplot(data=cor_by_afc, aes(x=numAFC, y=prop_correct, color=numAFC)) +
  facet_wrap(~schoolId) +
  geom_point(alpha=.6) +
  geom_line(aes(group=pid), alpha=.2) 

Accuracy by AFC at word level

by_word <- test_data %>%
  group_by(targetWord, numAFC, item_pair, schoolId) %>%
  summarize(prop_correct = mean(correct), participants = length(unique(pid)), mean_rt = mean(rt))
ggplot(data=by_word, aes(x=numAFC, y=prop_correct, color=numAFC)) +
  geom_point(alpha=.6) +
  facet_wrap(~schoolId) +
  geom_text(aes(label = as.character(targetWord)), hjust=0.4, vjust=0, size = 2) + 
  geom_line(aes(group=targetWord), alpha=.2)

RT by AFC at word level

ggplot(data=by_word, aes(x=numAFC, y=mean_rt, color=numAFC)) +
  geom_point(alpha=.6) +
  facet_wrap(~schoolId) +
  geom_line(aes(group=targetWord), alpha=.2)

Construct data for IRT model

Filter out items that are at ceiling – all participants got correct.

ceiling = test_data %>%
  group_by(item_pair, targetWord, numAFC) %>%
  summarize(prop_correct = mean(correct)) %>%
  filter(prop_correct == 1)

which ones are at ceiling?

ceiling %>%
  group_by(numAFC) %>%
  summarize(num_item = length(item_pair))
## # A tibble: 3 × 2
##   numAFC num_item
##   <chr>     <int>
## 1 2             3
## 2 3             1
## 3 4             1

Construct data structures

d_wide_all.2<- test_data %>% 
  filter(itemType == "test") %>%  #make sure only test items
  filter(!targetWord %in% ceiling$targetWord) %>%
  select(sub_id, runId, item_pair, correct) %>% 
  mutate(correct = as.numeric(correct)) %>% 
  pivot_wider(names_from=item_pair, values_from=correct) 
  
d_mat_all.2 <- as.matrix(d_wide_all.2 %>%
  select(-c(sub_id, runId)))

Make a list for specifying guess rate in IRT

#colnames(d_mat_all)
list.items <- colnames(d_mat_all.2)
list.guessing <- NULL
for (i in seq(1 : length(list.items))) {
  x <- list.items[i]
  num <- substr(x, nchar(x), nchar(x))
  if (num == "2") {
    list.guessing <- c(list.guessing, 0.5)
  } else if (num == "3") {
    list.guessing <- c(list.guessing, 0.33)
  } else {
    list.guessing <- c(list.guessing, 0.25)
  }
}
# by word and afc
df.item.summary <- test_data %>% 
  filter(itemType == "test") %>% 
  group_by(targetWord, numAFC, item_pair) %>% 
  summarise(pcor = mean(correct), n = n()) %>% 
  filter(pcor != 1)

# by just word, not afc
df.item.summary.2 <- test_data %>% 
  filter(itemType == "test") %>% 
  group_by(targetWord) %>% 
  summarise(pcor = mean(correct), n = n()) %>% 
  filter(pcor != 1)

Construct basic IRT model – 2PL

start.dim <- dim(d_mat_all.2)[2]
mm.string = (
  'F = 1-%d,
  PRIOR = (1-%d, a1, norm, 1, 0.5),
  PRIOR = (1-%d, d, norm, 0, 2), 
  LBOUND = (1-%d, a1, 0.5)'
  )
mm = mirt.model(sprintf(mm.string, 
                        start.dim,
                        start.dim, 
                        start.dim,
                        start.dim))

Run basic 2PL model with priors specified above and guessing rate per item

m <- mirt(d_mat_all.2,model = mm, itemtype = '2PL', guess = (as.vector(list.guessing)))
## 
Iteration: 1, Log-Lik: -8358.871, Max-Change: 3.54177
Iteration: 2, Log-Lik: -7044.339, Max-Change: 0.40877
Iteration: 3, Log-Lik: -6969.672, Max-Change: 0.29793
Iteration: 4, Log-Lik: -6923.494, Max-Change: 0.23323
Iteration: 5, Log-Lik: -6894.026, Max-Change: 0.18725
Iteration: 6, Log-Lik: -6875.184, Max-Change: 0.15078
Iteration: 7, Log-Lik: -6863.128, Max-Change: 0.12116
Iteration: 8, Log-Lik: -6855.404, Max-Change: 0.09738
Iteration: 9, Log-Lik: -6850.454, Max-Change: 0.07810
Iteration: 10, Log-Lik: -6847.283, Max-Change: 0.06259
Iteration: 11, Log-Lik: -6845.252, Max-Change: 0.05013
Iteration: 12, Log-Lik: -6843.952, Max-Change: 0.04014
Iteration: 13, Log-Lik: -6841.731, Max-Change: 0.00830
Iteration: 14, Log-Lik: -6841.698, Max-Change: 0.00642
Iteration: 15, Log-Lik: -6841.676, Max-Change: 0.00517
Iteration: 16, Log-Lik: -6841.640, Max-Change: 0.00100
Iteration: 17, Log-Lik: -6841.639, Max-Change: 0.00081
Iteration: 18, Log-Lik: -6841.639, Max-Change: 0.00064
Iteration: 19, Log-Lik: -6841.638, Max-Change: 0.00026
Iteration: 20, Log-Lik: -6841.638, Max-Change: 0.00011
Iteration: 21, Log-Lik: -6841.638, Max-Change: 0.00008
params <- data.frame(coef(m, IRTpars = FALSE, simplify = TRUE))  # slope/intercept parameters with prior dist.
params_irt_true <- data.frame(coef(m, IRTpars = TRUE, simplify = TRUE))  # IRT parameters

params <- cbind(item_pair = rownames(params), params)
params_irt_true <- cbind(item_pair = rownames(params_irt_true), params_irt_true)

Make a data structrue with both IRT params and slope/intercept params

df.params.update <- params %>%
  right_join(params_irt_true)
# checking a1=a
sum(df.params.update$items.a1==df.params.update$items.a)
## [1] 312

Look at how d (slope in model) is different than the b param (IRT param)

qplot(df.params.update$items.d,df.params.update$items.b)

Make data structure with item summary

df.item.summary.params <- df.item.summary %>% 
  right_join(df.params.update, by = "item_pair") 

Plots from IRT models

ggplot(df.item.summary.params ,
       aes(x = pcor,
           y = items.a)) + 
  geom_text(aes(label = as.character(item_pair)), hjust=0.4, vjust=0, size = 2) +
  geom_point(size = 0.5) 

ggplot(df.item.summary.params ,
       aes(x = pcor,
           y = items.b)) + 
  facet_wrap(~numAFC) +
  geom_text(aes(label = as.character(item_pair)), hjust=0.4, vjust=0, size = 2) +
  geom_point(size = 0.5) 

Data structure for merging back with item info

items_in_model <- test_data %>%
  filter(!targetWord %in% ceiling$targetWord) %>%
  group_by(item_pair, numAFC, targetWord) %>% 
  summarise(mean_rt = mean(rt))
thetas <- as.matrix(fscores(m, method = "ML", max_theta = 6, min_theta = -6))
df.theta <- d_wide_all.2 %>% 
  select(sub_id, runId) %>% 
  add_column(theta = as.numeric(thetas))
ggplot(df.theta) +
  geom_histogram(aes(x = theta), color="black", fill="grey", bins = 40) +
  labs(x = "thetaEstimate",
       y = "count of students",
       title = "Distribution of thetas ")

ggplot(df.item.summary.params) +
  geom_histogram(aes(x = items.b), color="black", fill="grey", bins = 40) +
  labs(x = "b.params",
       y = "count of items",
       title = "Distribution of b params ")

ggplot(df.item.summary.params) +
  geom_histogram(aes(x = items.a), color="black", fill="grey", bins = 40) +
  xlim(0, 3) +
  labs(x = "a.params",
       y = "count of items",
       title = "Distribution of a params ")

Calculate through Item Information

df.bank <- test_data %>%
  filter(itemType!='catch') %>%
  filter(!targetWord %in% ceiling$targetWord) %>%
  select(sub_id, runId, item_pair, correct, rt) %>% 
  left_join(df.theta, by = c("sub_id", "runId")) %>% 
  left_join(df.item.summary.params, by = c("item_pair")) %>% 
  filter(theta != Inf)
df.bank.iteminfo.allItems <- df.bank  %>% 
  rowwise() %>%
  mutate(itemInfo = iteminfo(extract.item(m,item_pair), theta)) %>%
  mutate(itemInfoNorm = itemInfo/(rt/1000))
fisherInfo <- function(th, a, b, c){
  p = c + (1-c) * exp(a*(th - b))/(1+ exp(a*(th - b)))
  q = 1- p 
  return  ((a^2) * (q/p) * ((p-c)^2) / ((1-c)^2))
}
df.bank.fisherInfo.allItems <- df.bank  %>% 
  mutate(fisherInfo = fisherInfo(theta, items.a, items.b, items.g)) %>% 
  mutate(fisherInfo_norm = fisherInfo/(rt/1000))
df.bank.bothInfos.allItems <- df.bank.fisherInfo.allItems  %>% 
  right_join(df.bank.iteminfo.allItems) 

What this threshold is seems to matter

df.bank.bothInfos.thetaBin <- df.bank.bothInfos.allItems  %>% 
  filter(abs(theta - items.b) < .5) 
not_enough_trials_per_sub <- df.bank.bothInfos.thetaBin  %>%
  group_by(sub_id) %>%
  summarize(trial_count = length(unique(item_pair)))  %>%
  filter(trial_count<5)
  
# hist(trials_per_sub$trial_count)          
df.bank.bothInfos.thetaBin.filtered <- df.bank.bothInfos.allItems  %>% 
  filter(abs(theta - items.b) < .5)  %>%
  filter(!sub_id %in% not_enough_trials_per_sub$sub_id)

Comparing item info for all items

df.bank.bothInfos.allItems %>% 
  group_by(numAFC) %>% 
  summarise(mean(fisherInfo_norm), sd(fisherInfo_norm), mean(itemInfoNorm), sd(itemInfoNorm),  n = n()) 
## # A tibble: 3 × 6
##   numAFC `mean(fisherInfo_n…` `sd(fisherInfo…` `mean(itemInfo…` `sd(itemInfoNo…`
##   <chr>                 <dbl>            <dbl>            <dbl>            <dbl>
## 1 2                    0.0268           0.0339           0.0268           0.0339
## 2 3                    0.0320           0.0455           0.0320           0.0455
## 3 4                    0.0393           0.0538           0.0393           0.0538
## # … with 1 more variable: n <int>
summary(lm(data = df.bank.bothInfos.allItems, itemInfoNorm ~ numAFC))
## 
## Call:
## lm(formula = itemInfoNorm ~ numAFC, data = df.bank.bothInfos.allItems)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.03932 -0.02674 -0.01723  0.01217  0.83346 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0267770  0.0004794  55.855  < 2e-16 ***
## numAFC3     0.0052530  0.0006823   7.699 1.42e-14 ***
## numAFC4     0.0125422  0.0006854  18.299  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.045 on 25841 degrees of freedom
## Multiple R-squared:  0.01288,    Adjusted R-squared:  0.0128 
## F-statistic: 168.5 on 2 and 25841 DF,  p-value: < 2.2e-16

Comparing item info only for trials around estimated theta per participant.

When excluding subs with only a few trials

df.bank.bothInfos.thetaBin.filtered %>% 
  group_by(numAFC) %>% 
  summarise(mean(fisherInfo_norm), sd(fisherInfo_norm), mean(itemInfoNorm), sd(itemInfoNorm),  n = n()) 
## # A tibble: 3 × 6
##   numAFC `mean(fisherInfo_n…` `sd(fisherInfo…` `mean(itemInfo…` `sd(itemInfoNo…`
##   <chr>                 <dbl>            <dbl>            <dbl>            <dbl>
## 1 2                    0.0788           0.0428           0.0788           0.0428
## 2 3                    0.111            0.0743           0.111            0.0743
## 3 4                    0.131            0.0758           0.131            0.0758
## # … with 1 more variable: n <int>

Or keeping all

df.bank.bothInfos.thetaBin %>% 
  group_by(numAFC) %>% 
  summarise(mean(fisherInfo_norm), sd(fisherInfo_norm), mean(itemInfoNorm), sd(itemInfoNorm),  n = n()) 
## # A tibble: 3 × 6
##   numAFC `mean(fisherInfo_n…` `sd(fisherInfo…` `mean(itemInfo…` `sd(itemInfoNo…`
##   <chr>                 <dbl>            <dbl>            <dbl>            <dbl>
## 1 2                    0.0794           0.0430           0.0794           0.0430
## 2 3                    0.110            0.0731           0.110            0.0731
## 3 4                    0.130            0.0740           0.130            0.0740
## # … with 1 more variable: n <int>
summary(lm(data = df.bank.bothInfos.thetaBin, itemInfoNorm ~ numAFC))
## 
## Call:
## lm(formula = itemInfoNorm ~ numAFC, data = df.bank.bothInfos.thetaBin)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.10957 -0.03975 -0.01139  0.02503  0.75511 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.079381   0.002373   33.45   <2e-16 ***
## numAFC3     0.030995   0.003440    9.01   <2e-16 ***
## numAFC4     0.050197   0.003394   14.79   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06456 on 2117 degrees of freedom
## Multiple R-squared:  0.09543,    Adjusted R-squared:  0.09458 
## F-statistic: 111.7 on 2 and 2117 DF,  p-value: < 2.2e-16

Visualize multiAFC different in terms of item information

df.summary.info = df.bank.bothInfos.allItems %>% 
  group_by(targetWord, item_pair, items.a, items.b, numAFC) %>% 
  summarise(meanItemInfoNorm = mean(itemInfoNorm))
ggplot(df.summary.info, mapping = aes(x = items.a,
                     y = meanItemInfoNorm, color = numAFC)) +
  labs(x = "discrimination param (a)",
       y = "mean item info normalized ") +
  geom_point(size = 0.5) 

ggplot(df.summary.info, mapping = aes(x = items.b,
                     y = meanItemInfoNorm, color = numAFC)) +
  labs(x = "difficulty param (b)",
       y = "mean item info normalized ") +
  geom_point(size = 0.5) + 
  xlim(-6, 6)

ggplot(data = df.summary.info, aes(x=as.factor(numAFC), y=meanItemInfoNorm, col=numAFC)) +
  # geom_jitter(height=.1, width=.3, alpha=.8)  +
  geom_point(alpha=.6) +
  geom_line(aes(group=targetWord), alpha=.6, color='grey') +
  theme_few() +
  ylab('mean item information') +
  xlab('AFC version') +
  ggtitle('RT normalized item information for each item as a function of 234AFC') +
  theme(legend.position = 'none')

ggplot(df.bank.bothInfos.allItems, 
  aes(itemInfoNorm, colour = numAFC)) + 
  geom_density(size=0.5, alpha = 0.2) + 
  labs(title = 'normalized itemInfo distribution (all items)') + 
  xlim(0, 0.02)

ggplot(df.bank.bothInfos.thetaBin, 
  aes(itemInfoNorm, colour = numAFC)) + 
  geom_density(size=0.5, alpha = 0.2) + 
  labs(title = 'normalized itemInfo distribution (theta bin)') + 
  xlim(0, 0.08)

ggplot(df.bank.bothInfos.allItems,
       aes(x = theta,
           y = itemInfoNorm, 
           color = numAFC)) + 
  geom_smooth() + xlim(-5, 5) + 
  labs(title = 'normalized itemInfo across theta (all items)')

ggplot(df.bank.bothInfos.thetaBin.filtered,
       aes(x = theta,
           y =  itemInfoNorm, 
           color = numAFC)) + 
  geom_smooth() + xlim(-5, 5) + 
  labs(title = 'normalized itemInfo across theta (theta bin)')

Validate: diff between fisherInfo vs. itemInfo approach

df.item.compare <- df.bank.fisherInfo.allItems %>% 
  mutate(numAFC = as.numeric(numAFC)) %>%
  left_join(df.bank.iteminfo.allItems %>% select(sub_id, runId, item_pair, itemInfoNorm )) 
df.item.compare_afc <- df.item.compare %>%
  group_by(numAFC, targetWord, item_pair) %>%
  summarize(itemInfoNorm_Avg =  mean(itemInfoNorm), fisherInfoNorm_Avg = mean(fisherInfo_norm))
ggplot(df.item.compare_afc, mapping = aes(x = itemInfoNorm_Avg,
                     y = fisherInfoNorm_Avg, color = as.factor(numAFC))) +
  labs(x = "info calculated by itemInfo",
       y = "info calculated by fisherInfo") +
  geom_point(size = 0.5) +
  theme_few() +
  geom_line(aes(group=targetWord), alpha=.2) +
  geom_smooth(aes(group=numAFC), span=20)