Setup

library(tidyverse)
library(lubridate)
library(ggthemes)
library(langcog)
library(mirt)
library(knitr)
library("tidyverse") # for data wrangling 
library(dplyr)

Load data from multiafc task

pilot_data  = read_csv(here::here('data/data_multiafc/multi-afc-april28.csv')) %>%
  as_tibble()

Load data and impose basic filtering requirements

test_data <- pilot_data %>%
  filter(!block %in% c('practice'), completed == TRUE) %>%
  filter(rt>200) %>%
  filter(rt<5000) %>%
  mutate(item_pair = paste0(targetWord,'_',numAFC)) %>%
  mutate(sub_id = pid, numAFC = as.character(numAFC))

We have 205 participants.

sanity <- test_data %>%
  group_by(pid, numAFC, itemType) %>%
  summarize(num_trials = length(trialId), num_test = sum(task=='test_response'), num_prac = sum(task=='practice_response'), date_took = timeFinished[1], completed_or_not = sum(completed), unique_words = length(unique(targetWord)))

low_performers <- test_data %>%
  filter(itemType=='catch') %>%
  group_by(pid) %>%
  summarize(prop_catch = mean(correct)) %>%
  filter(prop_catch<1)

We filtered out 1 for missing catch trials.

test_data <- test_data %>%
  filter(!pid %in% low_performers$pid) %>% 
  filter(itemType != "catch")

Basic descriptives before IRT models

cor_by_afc <- test_data %>%
  group_by(pid, numAFC, schoolId) %>%
  summarize(prop_correct = mean(correct), mean_rt = mean(rt))

Person Level

Accuracy by AFC at participant level

ggplot(data=cor_by_afc, aes(x=numAFC, y=prop_correct, color=numAFC)) +
  facet_wrap(~schoolId) +
  geom_point(alpha=.6) +
  geom_line(aes(group=pid), alpha=.2)

RT person level

ggplot(data=cor_by_afc, aes(x=numAFC, y = mean_rt, color=numAFC)) +
  facet_wrap(~schoolId) +
  geom_point(alpha=.6) +
  geom_line(aes(group=pid), alpha=.2)

Word Level

Accuracy by AFC at word level

by_word <- test_data %>%
  group_by(targetWord, numAFC, item_pair, schoolId) %>%
  summarize(prop_correct = mean(correct), participants = length(unique(pid)), mean_rt = mean(rt))

ggplot(data=by_word, aes(x=numAFC, y=prop_correct, color=numAFC)) +
  geom_point(alpha=.6) +
  facet_wrap(~schoolId) +
  geom_text(aes(label = as.character(targetWord)), hjust=0.4, vjust=0, size = 2) + 
  geom_line(aes(group=targetWord), alpha=.2)

RT by AFC at word level

ggplot(data=by_word, aes(x=numAFC, y=mean_rt, color=numAFC)) +
  geom_point(alpha=.6) +
  facet_wrap(~schoolId) +
  geom_line(aes(group=targetWord), alpha=.2)

Construct data for IRT model

Filter out items that are at ceiling – all adult participants got correct.

ceiling = test_data %>%
  group_by(item_pair, targetWord, numAFC) %>%
  summarize(prop_correct = mean(correct)) %>%
  filter(prop_correct == 1)

which ones are at ceiling?

ceiling %>%
  group_by(numAFC) %>%
  summarize(num_item = length(item_pair))

## # A tibble: 3 × 2
##   numAFC num_item
##   <chr>     <int>
## 1 2            28
## 2 3            25
## 3 4            23

Words where every trial was correct for all participants across all afcs.

ceiling.2 = test_data %>%
  filter(itemType == "test") %>% 
  group_by(targetWord) %>%
  summarize(prop_correct = mean(correct)) %>%
  filter(prop_correct == 1)
ceiling.2

## # A tibble: 4 × 2
##   targetWord prop_correct
##   <chr>             <dbl>
## 1 carrot                1
## 2 rice                  1
## 3 stretcher             1
## 4 sunflower             1

d_wide_all<- test_data %>%
  filter(!item_pair %in% ceiling$item_pair) %>%
  select(sub_id, runId, item_pair, correct) %>% 
  mutate(correct = as.numeric(correct)) %>% 
  pivot_wider(names_from=item_pair, values_from=correct) 
  

d_mat_all <- as.matrix(d_wide_all %>%
  select(-c(sub_id, runId)))

#colnames(d_mat_all)
list.items <- colnames(d_mat_all)
list.guessing <- NULL
for (i in seq(1 : length(list.items))) {
  x <- list.items[i]
  num <- substr(x, nchar(x), nchar(x))
  if (num == "2") {
    list.guessing <- c(list.guessing, 0.5)
  } else if (num == "3") {
    list.guessing <- c(list.guessing, 0.33)
  } else {
    list.guessing <- c(list.guessing, 0.25)
  }
}

d_wide_all.2<- test_data %>% 
  filter(itemType == "test") %>% 
  filter(!targetWord %in% ceiling.2$targetWord) %>%
  select(sub_id, runId, targetWord, correct) %>% 
  mutate(correct = as.numeric(correct)) %>% 
  pivot_wider(names_from=targetWord, values_from=correct) 
  
d_mat_all.2 <- as.matrix(d_wide_all.2 %>%
  select(-c(sub_id, runId)))

df.item.summary <- test_data %>% 
  filter(itemType == "test") %>% 
  group_by(targetWord, numAFC, item_pair) %>% 
  summarise(pcor = mean(correct), n = n()) %>% 
  filter(pcor != 1)

df.item.summary.2 <- test_data %>% 
  filter(itemType == "test") %>% 
  group_by(targetWord) %>% 
  summarise(pcor = mean(correct), n = n()) %>% 
  filter(pcor != 1)

Construct basic IRT model – 2PL

start.dim <- dim(d_mat_all)[2]-1
  mm = (
  'F = 1-%d,
  PRIOR = (1-%d, a1, norm, .2, 1),
  PRIOR = (1-%d, d, norm, 0, 2)'
  )
  mm = mirt.model(sprintf(mm,start.dim,start.dim,start.dim))

Run basic 2PL model

m <- mirt(d_mat_all,model = mm, itemtype = '2PL', guess = (as.vector(list.guessing)))

## 
Iteration: 1, Log-Lik: -6648.035, Max-Change: 2.93388
Iteration: 2, Log-Lik: -3779.615, Max-Change: 0.93751
Iteration: 3, Log-Lik: -3633.402, Max-Change: 0.60292
Iteration: 4, Log-Lik: -3587.651, Max-Change: 0.46466
Iteration: 5, Log-Lik: -3561.415, Max-Change: 0.35267
Iteration: 6, Log-Lik: -3545.558, Max-Change: 0.25648
Iteration: 7, Log-Lik: -3536.239, Max-Change: 0.19861
Iteration: 8, Log-Lik: -3530.621, Max-Change: 0.15285
Iteration: 9, Log-Lik: -3527.206, Max-Change: 0.11706
Iteration: 10, Log-Lik: -3525.135, Max-Change: 0.08935
Iteration: 11, Log-Lik: -3523.883, Max-Change: 0.06792
Iteration: 12, Log-Lik: -3523.129, Max-Change: 0.05254
Iteration: 13, Log-Lik: -3522.075, Max-Change: 0.01186
Iteration: 14, Log-Lik: -3522.050, Max-Change: 0.00941
Iteration: 15, Log-Lik: -3522.035, Max-Change: 0.00724
Iteration: 16, Log-Lik: -3522.014, Max-Change: 0.00271
Iteration: 17, Log-Lik: -3522.014, Max-Change: 0.00153
Iteration: 18, Log-Lik: -3522.013, Max-Change: 0.00192
Iteration: 19, Log-Lik: -3522.013, Max-Change: 0.00091
Iteration: 20, Log-Lik: -3522.013, Max-Change: 0.00036
Iteration: 21, Log-Lik: -3522.013, Max-Change: 0.00080
Iteration: 22, Log-Lik: -3522.013, Max-Change: 0.00042
Iteration: 23, Log-Lik: -3522.013, Max-Change: 0.00086
Iteration: 24, Log-Lik: -3522.013, Max-Change: 0.00033
Iteration: 25, Log-Lik: -3522.013, Max-Change: 0.00029
Iteration: 26, Log-Lik: -3522.013, Max-Change: 0.00063
Iteration: 27, Log-Lik: -3522.013, Max-Change: 0.00023
Iteration: 28, Log-Lik: -3522.013, Max-Change: 0.00098
Iteration: 29, Log-Lik: -3522.013, Max-Change: 0.00038
Iteration: 30, Log-Lik: -3522.013, Max-Change: 0.00016
Iteration: 31, Log-Lik: -3522.013, Max-Change: 0.00035
Iteration: 32, Log-Lik: -3522.013, Max-Change: 0.00062
Iteration: 33, Log-Lik: -3522.013, Max-Change: 0.00024
Iteration: 34, Log-Lik: -3522.013, Max-Change: 0.00020
Iteration: 35, Log-Lik: -3522.013, Max-Change: 0.00043
Iteration: 36, Log-Lik: -3522.013, Max-Change: 0.00017
Iteration: 37, Log-Lik: -3522.013, Max-Change: 0.00014
Iteration: 38, Log-Lik: -3522.013, Max-Change: 0.00029
Iteration: 39, Log-Lik: -3522.013, Max-Change: 0.00011
Iteration: 40, Log-Lik: -3522.013, Max-Change: 0.00010

df.params <- data.frame(coef(m, IRTpars = TRUE, simplify = TRUE)) 

df.params.update <- cbind(item_pair = rownames(df.params), df.params)

df.item.summary.params <- df.item.summary %>% 
  left_join(df.params.update, by = "item_pair")

ggplot(df.item.summary.params %>% filter(items.b < 10),
       aes(x = pcor,
           y = items.b)) + 
  geom_text(aes(label = as.character(targetWord)), hjust=0.4, vjust=0, size = 2) +
  geom_point(size = 0.5)

ggplot(df.item.summary.params, 
  aes(items.b, colour = numAFC)) + 
  xlim(-5, 5) +
  geom_density(size=0.5, alpha = 0.2) + 
  labs(title = 'b params')

ggplot(df.item.summary.params, 
  aes(items.a, colour = numAFC)) + 
  xlim(-5, 5) +
  geom_density(size=0.5, alpha = 0.2) + 
  labs(title = 'a params')

Data structure for merging back with item info

items_in_model <- test_data %>%
  filter(!item_pair %in% ceiling$item_pair) %>%
  distinct(item_pair, numAFC, targetWord)

Participant level thetas

thetas <- as.matrix(fscores(m, method = "EAP"))

Go through all items and compute summed item info for now with all Thetas (-4 to 4)

num_items_retained = dim(d_wide_all)[2] -2
items_retained = colnames(d_wide_all %>% select(-c(sub_id, runId)))
Theta <- matrix(seq(-4,4, by = .1))

for (i in seq(1,num_items_retained)){
  item_pair <- items_retained[i]
  item_info <- iteminfo(extract.item(m,i), Theta)
  summed_item_info = sum(item_info)

  if (i==1) {
    items = cbind(item_pair, summed_item_info) %>%
      as_tibble()
  }
  else {
    this_item = cbind(item_pair, summed_item_info) %>%
      as_tibble()

    items = items %>%
      rbind(this_item)
  }
}

Join back together with item information

all_item_info <- items_in_model %>%
  left_join(items) %>%
  mutate(summed_item_info = as.double(summed_item_info))

Overall averages and number of items retained by AFC

by_afc <- all_item_info %>%
  group_by(numAFC) %>%
  summarize(mean_item_info = mean(summed_item_info), num_items = length(item_pair))

by_word_all <- test_data %>%
  group_by(targetWord, numAFC, item_pair) %>%
  summarize(prop_correct = mean(correct), participants = length(unique(pid)), mean_rt = mean(rt))

all_item_info_with_rt <- all_item_info %>%
  right_join(by_word_all %>% select(item_pair, mean_rt)) %>%
  mutate(summed_item_info_normalized = summed_item_info / (mean_rt/1000))

Plots from IRT models

ggplot(data = all_item_info, aes(x=as.factor(numAFC), y=summed_item_info, col=numAFC)) +
  # geom_jitter(height=.1, width=.3, alpha=.8)  +
  geom_point(alpha=.6) +
  geom_line(aes(group=targetWord), alpha=.6, color='grey') +
  theme_few() +
  ylab('Summed item information') +
  xlab('AFC version') +
  ggtitle('Item information for each item as a function of 234AFC') +
  theme(legend.position = 'none')

ggplot(data = all_item_info_with_rt, aes(x=as.factor(numAFC), y=summed_item_info_normalized, col=numAFC)) +
  # geom_jitter(height=.1, width=.3, alpha=.8)  +
  geom_point(alpha=.6) +
  geom_line(aes(group=targetWord), alpha=.6, color='grey') +
  theme_few() +
  ylab('Summed item information') +
  xlab('AFC version') +
  ggtitle('RT normalized item information for each item as a function of 234AFC') +
  theme(legend.position = 'none')

Run some linear models to check visuals

Non-normalized by AFC?

summary(lm(data = all_item_info_with_rt, summed_item_info ~ numAFC))

## 
## Call:
## lm(formula = summed_item_info ~ numAFC, data = all_item_info_with_rt)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.5930 -1.7436 -0.1088  1.4080  6.6012 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.0491     0.2380   8.609 9.09e-16 ***
## numAFC3       0.9316     0.3336   2.793  0.00564 ** 
## numAFC4       1.5481     0.3316   4.668 5.01e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.129 on 245 degrees of freedom
##   (76 observations deleted due to missingness)
## Multiple R-squared:  0.08243,    Adjusted R-squared:  0.07494 
## F-statistic: 11.01 on 2 and 245 DF,  p-value: 2.649e-05

Normalized by AFC?

summary(lm(data = all_item_info_with_rt, summed_item_info_normalized ~ numAFC))

## 
## Call:
## lm(formula = summed_item_info_normalized ~ numAFC, data = all_item_info_with_rt)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8461 -0.9712 -0.0637  0.8367  3.4461 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.2598     0.1273   9.897  < 2e-16 ***
## numAFC3       0.3582     0.1784   2.008  0.04577 *  
## numAFC4       0.5888     0.1774   3.320  0.00104 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.139 on 245 degrees of freedom
##   (76 observations deleted due to missingness)
## Multiple R-squared:  0.04352,    Adjusted R-squared:  0.03572 
## F-statistic: 5.574 on 2 and 245 DF,  p-value: 0.004291

all_item_info_with_rt %>% 
  group_by(numAFC) %>% 
  summarise(mean(summed_item_info_normalized, na.rm = TRUE), sd = sd(summed_item_info_normalized, na.rm = TRUE))

## # A tibble: 3 × 3
##   numAFC `mean(summed_item_info_normalized, na.rm = TRUE)`    sd
##   <chr>                                              <dbl> <dbl>
## 1 2                                                   1.26 0.813
## 2 3                                                   1.62 1.18 
## 3 4                                                   1.85 1.34

2 vs 3 normalized?

summary(lm(data = all_item_info_with_rt %>% filter(numAFC<4), summed_item_info_normalized ~ numAFC))

## 
## Call:
## lm(formula = summed_item_info_normalized ~ numAFC, data = all_item_info_with_rt %>% 
##     filter(numAFC < 4))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.61794 -0.91226 -0.06579  0.71898  3.05217 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.2598     0.1136  11.085   <2e-16 ***
## numAFC3       0.3582     0.1593   2.249   0.0259 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.017 on 161 degrees of freedom
##   (53 observations deleted due to missingness)
## Multiple R-squared:  0.03046,    Adjusted R-squared:  0.02443 
## F-statistic: 5.057 on 1 and 161 DF,  p-value: 0.02588

3 vs 4 afc normalized?

summary(lm(data = all_item_info_with_rt %>% filter(numAFC>2), summed_item_info_normalized ~ numAFC))

## 
## Call:
## lm(formula = summed_item_info_normalized ~ numAFC, data = all_item_info_with_rt %>% 
##     filter(numAFC > 2))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8461 -1.0806 -0.1904  0.9397  3.4461 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.6180     0.1388  11.657   <2e-16 ***
## numAFC4       0.2306     0.1951   1.182    0.239    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.264 on 166 degrees of freedom
##   (48 observations deleted due to missingness)
## Multiple R-squared:  0.008344,   Adjusted R-squared:  0.00237 
## F-statistic: 1.397 on 1 and 166 DF,  p-value: 0.239

summary(lm(data = all_item_info_with_rt %>% filter(numAFC %in% c(2, 4)), summed_item_info_normalized ~ numAFC))

## 
## Call:
## lm(formula = summed_item_info_normalized ~ numAFC, data = all_item_info_with_rt %>% 
##     filter(numAFC %in% c(2, 4)))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8461 -0.9699 -0.0045  0.8004  3.4461 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.2598     0.1249  10.087  < 2e-16 ***
## numAFC4       0.5888     0.1740   3.383 0.000897 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.117 on 163 degrees of freedom
##   (51 observations deleted due to missingness)
## Multiple R-squared:  0.06562,    Adjusted R-squared:  0.05989 
## F-statistic: 11.45 on 1 and 163 DF,  p-value: 0.0008966

multi-afc-analysis

Bria Long, Anya Ma (edit)

2023-04-26