Setup

Load libraries

library(tidyverse)
library(lubridate)
library(ggthemes)
library(langcog)
library(mirt)
library('ggpubr')
library(knitr)
library("tidyverse") # for data wrangling 
library(dplyr)
library(here)

Read in demographics

bing_demo <- read_csv(here::here('data/validation_data/demographics.csv')) %>%
    as_tibble %>%
    select(SubID, `Age (months)`, Exclude, Date) %>%
    rename(age_in_months = `Age (months)`) %>%
    rename(pid = SubID)

Read in bing data

bing_data_raw <- read_csv(here::here('data/validation_data/bing-trials-aug10.csv')) %>%
    as_tibble %>%
    mutate(schoolId = "bing")

Check for erroneous participants

# Experimenter 
duplicate_ids <- bing_data_raw %>%
  group_by(pid) %>%
  filter(!pid %in% c('test')) %>%
  filter(completed==TRUE) %>%
  summarize(num_runs = length(unique(runId))) %>%
  filter(num_runs > 1)

unique_ids <- bing_data_raw %>%
  group_by(pid) %>%
  summarize(num_runs = length(unique(runId)), completed_status = completed[1])

dates_huh <- bing_data_raw %>%
  filter(pid %in% duplicate_ids$pid) %>%
  distinct(pid, runId, start_time)

unique_ids_incomplete <- bing_data_raw %>%
  group_by(pid) %>%
  filter(completed==FALSE) %>%
  # filter(task == 'test_response') %>%
  summarize(num_runs = length(unique(runId)), num_trials = length(unique(trialId)))

There were 6 participants who didn’t finish the task.

guessing_kids <- bing_data_raw %>%
  filter(task=='test_response') %>%
  filter(numAFC==2) %>%
  group_by(pid) %>%
  summarize(pc = mean(correct)) %>%
  filter(pc < .55)

guess_kids_by_afc <- bing_data_raw %>%
  filter(pid %in% guessing_kids$pid) %>%
  filter(task=='test_response') %>%
  group_by(pid,numAFC) %>%
  summarize(pc = mean(correct))

There were 2 participants were excluded for experimenter error

There were 74 participants initially.

There were 11 participants who scored near chance on the assessment (<55% on 2AFC) and were excluded.

There were 6 participants who did not complete the assessment.

Exclude participants

bing_data <- bing_data_raw %>%
  filter(!pid %in% duplicate_ids$pid) %>%
  filter(completed==TRUE) %>% # had to complete
  filter(!pid %in% c('bing-01-pilot','bing-01-demo','test'))  %>% #test subs
  filter(!pid %in% guessing_kids$pid) %>%
  filter(!block %in% c('practice')) %>%
  left_join(bing_demo) 
  # filter(rt > 300) %>%
  # filter(rt < 20000)

# kids_enough_trials <- bing_data %>%
#   group_by(pid) %>%
#   summarize(num_trials = length(unique(trialId))) %>%
#   summarize(average_rt = mean(rt))

processed_data <- read_csv(here::here('data/validation_data/preprocessed_data/theta_by_child.csv')) 
processed_data_resp <- read_csv(here::here('data/validation_data/preprocessed_data/theta_by_child_resp.csv'))

Grab NIH toolbox data

nih_data <- nih_data %>%
  filter(! pid %in% c('test'))

Make basic descriptives of bing data

#A
sub_summary <- bing_data %>% 
  # filter(rt>200) %>%
  # filter(rt<15000) %>%
  group_by(pid,numAFC, age_in_months) %>% 
  summarize(pc = mean(correct), med_rt = median(rt))

# Across all AFC
sub_summary_overall <- bing_data %>% 
  group_by(pid, age_in_months) %>% 
  summarize(pc = mean(correct), med_rt = median(rt))

ggplot(data = sub_summary, aes(x=numAFC, y=pc, col=age_in_months)) +
  geom_point() +
  geom_line(aes(group=pid)) +
  theme_few() +
  ylim(.25, 1)

Do some sanity on the merging of the data by date

roar_date_check <- bing_data %>%
  distinct(pid, runId, timeFinished)

date_check <- nih_data %>%
  right_join(roar_date_check) %>%
  select(pid, InstrumentEnded, timeFinished) %>%
  mutate(roar_date = date(timeFinished), roar_hour = hour(timeFinished), nih_date = date(InstrumentEnded), nih_hour = hour(InstrumentEnded)) %>%
  mutate(sanity = (roar_date == nih_date)) %>% 
  mutate(sanity_hour = (roar_hour == nih_hour))

Get theta estimate from prior data

Use preprocessed data from other children/adults who have taken this version to estimate thetas for each child.

Construct 3PL

d_mat_all <- processed_data_resp %>% 
  select(-...1)

start.dim <- dim(d_mat_all)[2]
mm.string = (
  'F = 1-%d,
  PRIOR = (1-%d, a1, norm, 1, 0.5),
  PRIOR = (1-%d, d, norm, 0, 2), 
  LBOUND = (1-%g, g, 0.25), 
  UBOUND = (1-%g, g, 1), 
  LBOUND = (1-%d, a1, 0.5)'
  )
mm = mirt.model(sprintf(mm.string, 
                        start.dim,
                        start.dim, 
                        start.dim,
                        start.dim, 
                        start.dim,
                        start.dim))

Run basic 3PL model with priors specified above and guessing rate per item

mod.vocab <- mirt(d_mat_all, model = mm, itemtype = '3PL')

## 
Iteration: 1, Log-Lik: -46124.463, Max-Change: 3.91347
Iteration: 2, Log-Lik: -38209.299, Max-Change: 0.51354
Iteration: 3, Log-Lik: -37868.194, Max-Change: 0.34695
Iteration: 4, Log-Lik: -37624.475, Max-Change: 0.31731
Iteration: 5, Log-Lik: -37440.093, Max-Change: 0.30857
Iteration: 6, Log-Lik: -37304.204, Max-Change: 0.25127
Iteration: 7, Log-Lik: -37205.235, Max-Change: 0.28409
Iteration: 8, Log-Lik: -37132.110, Max-Change: 0.31873
Iteration: 9, Log-Lik: -37078.879, Max-Change: 0.14872
Iteration: 10, Log-Lik: -37040.283, Max-Change: 0.12788
Iteration: 11, Log-Lik: -37012.523, Max-Change: 0.15922
Iteration: 12, Log-Lik: -36992.473, Max-Change: 0.12055
Iteration: 13, Log-Lik: -36978.014, Max-Change: 0.09798
Iteration: 14, Log-Lik: -36967.628, Max-Change: 0.10210
Iteration: 15, Log-Lik: -36960.150, Max-Change: 0.15568
Iteration: 16, Log-Lik: -36954.748, Max-Change: 0.06872
Iteration: 17, Log-Lik: -36950.903, Max-Change: 0.07854
Iteration: 18, Log-Lik: -36948.132, Max-Change: 0.04773
Iteration: 19, Log-Lik: -36942.478, Max-Change: 0.03661
Iteration: 20, Log-Lik: -36942.101, Max-Change: 0.01369
Iteration: 21, Log-Lik: -36941.832, Max-Change: 0.01492
Iteration: 22, Log-Lik: -36941.187, Max-Change: 0.00394
Iteration: 23, Log-Lik: -36941.178, Max-Change: 0.01102
Iteration: 24, Log-Lik: -36941.171, Max-Change: 0.00173
Iteration: 25, Log-Lik: -36941.167, Max-Change: 0.00150
Iteration: 26, Log-Lik: -36941.164, Max-Change: 0.00127
Iteration: 27, Log-Lik: -36941.161, Max-Change: 0.00107
Iteration: 28, Log-Lik: -36941.155, Max-Change: 0.00015
Iteration: 29, Log-Lik: -36941.155, Max-Change: 0.00012
Iteration: 30, Log-Lik: -36941.155, Max-Change: 0.00011
Iteration: 31, Log-Lik: -36941.155, Max-Change: 0.00002

# mirt::marginal_rxx(mod.vocab)

Pre-processing structure

Create sandbox participant who has seen all trials

list.items <- colnames(processed_data_resp %>% select(-...1))
df.sandbox.participant <- 
  tibble(pid = "sandbox", itemId = list.items, correct = 1)

Create trial ID structure for matrix

df.validation.roar.trials.clean <- bing_data %>% 
  filter(studyId %in%  c("validation-multiAFC"), itemType == "test")  %>%
   mutate(itemId = paste(targetWord, numAFC, 2, sep = "_")) %>%
   dplyr :: select(pid, runId, studyId, start_time, numAFC, targetWord, itemId, correct, rt)

Make each of the data structures for getting thetas

df.multiAFC.resp <- df.validation.roar.trials.clean %>% 
  select(pid, itemId, correct) %>% 
  rbind(df.sandbox.participant) %>%
  unique() %>% 
  pivot_wider(names_from = itemId, values_from = correct, values_fill = NA)

df.2AFC.resp <- df.validation.roar.trials.clean %>% 
  filter(numAFC == 2) %>% 
  select(pid, itemId, correct) %>% 
  rbind(df.sandbox.participant) %>%
  unique() %>% 
  pivot_wider(names_from = itemId, values_from = correct)

df.3AFC.resp <- df.validation.roar.trials.clean %>% 
  filter(numAFC == 3) %>% 
  select(pid, itemId, correct) %>% 
  rbind(df.sandbox.participant) %>%
  unique() %>% 
  pivot_wider(names_from = itemId, values_from = correct)

df.4AFC.resp <- df.validation.roar.trials.clean %>% 
  filter(numAFC == 4) %>% 
  select(pid, itemId, correct) %>% 
  rbind(df.sandbox.participant) %>%
  unique() %>% 
  pivot_wider(names_from = itemId, values_from = correct)

Get theta score based on these reseponse pattenrs

getTheta <- function(df){
  thetas <- fscores(mod.vocab, method = "ML", response.pattern = df %>% select(-pid), max_theta = 6, min_theta = -6)
  
  return (data.frame(thetas)$F)
}

Overall and based on 234afc separately

df.thetas.students <- df.multiAFC.resp %>% 
    dplyr :: select(pid) %>% 
    add_column(theta.multiAFC = getTheta(df.multiAFC.resp), 
               theta.2AFC = getTheta(df.2AFC.resp),
               theta.3AFC = getTheta(df.3AFC.resp),
               theta.4AFC = getTheta(df.4AFC.resp)
               )

Compute disattenutated correlation

Reliability for theta correlation for the bing sample kids

theta_se <- fscores(mod.vocab, response.pattern = df.multiAFC.resp %>% select(-pid), full.scores.SE = TRUE, method = 'ML')

empirical_rxx(theta_se)

##         F 
## 0.6258709

Merge back data from NIH and VisualVocab

sub_summary_overall_nih <- sub_summary_overall %>%
  left_join(nih_data %>% select(pid,Theta)) %>% 
  left_join(df.thetas.students, by = "pid")

write_csv(sub_summary_overall_nih, file = here::here('data/validation_data/output/all_validation_preprocessed_data.csv'))

ggplot(data = sub_summary_overall_nih, aes(x=Theta, y=theta.multiAFC, col=age_in_months)) +
  geom_point() +
  # theme_few() +
  geom_smooth(method = MASS::rlm) +
  geom_abline(color = 'gray') +
  labs(x = "NIH toolbox ability estimate", y = "Visual Vocab ability estimate: All trials") +
  xlim(-5, 1) + 
  ylim(-5, 1) +
  stat_cor(cor.coef.name = 'r', aes(label = ..r.label..)) + 
  coord_equal()

  # ggsave('comparison_overall.pdf', units = 'in', height= 6, width = 6)

ggplot(data = sub_summary_overall_nih, aes(x=Theta, y=theta.2AFC, col=age_in_months)) +
  geom_point() +
  geom_smooth(method = MASS::rlm) +
  geom_abline(color = 'gray') +
  labs(x = "NIH toolbox ability estimate", y = "Visual Vocab ability estimate: 2afc") +
  xlim(-5, 1) + 
  ylim(-5, 1) +
  stat_cor(cor.coef.name = 'r', aes(label = ..r.label..)) + 
  coord_equal()

# ggsave('comparison_2afc.pdf', units = 'in', height= 6, width = 6)

ggplot(data = sub_summary_overall_nih, aes(x=Theta, y=theta.3AFC, col=age_in_months)) +
  geom_point() +
  # theme_few(base_size=12) +
  geom_smooth(method = MASS::rlm) +
  geom_abline(color = 'gray') +
  labs(x = "NIH toolbox ability estimate", y = "Visual Vocab ability estimate: 3afc") +
  xlim(-5, 1) + 
  ylim(-5, 1) +
  stat_cor(cor.coef.name = 'r', aes(label = ..r.label..)) + 
  coord_equal()

# ggsave('comparison_3afc.pdf', units = 'in', height= 6, width = 6)

ggplot(data = sub_summary_overall_nih, aes(x=Theta, y=theta.4AFC, col=age_in_months)) +
  geom_point() +
  # theme_few(base_size=12) +å
  geom_smooth(method = MASS::rlm) +
  geom_abline(color = 'gray') +
  labs(x = "NIH toolbox ability estimate", y = "Visual Vocab ability estimate: 4afc") +
  xlim(-5, 1) + 
  ylim(-5, 1) +
  stat_cor(cor.coef.name = 'r', aes(label = ..r.label..)) + 
  coord_equal()

# ggsave('comparison_4afc.pdf', units = 'in', height= 6, width = 6)

Reliability for NIH toolbox - .81

denom = sqrt(.62*.81)

cor.test(sub_summary_overall_nih$Theta, sub_summary_overall_nih$theta.multiAFC)

## 
##  Pearson's product-moment correlation
## 
## data:  sub_summary_overall_nih$Theta and sub_summary_overall_nih$theta.multiAFC
## t = 3.1442, df = 52, p-value = 0.002752
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1477285 0.6029168
## sample estimates:
##       cor 
## 0.3996794

Look at correlations between accuracy vs. NIH thetas

sub_summary_nih <- sub_summary %>%
  left_join(nih_data)

ggplot(data = sub_summary_overall_nih, aes(x=Theta, y=pc, col=age_in_months)) +
  geom_point() +
  ylim(.25, 1) +
  geom_smooth(span=20) +
  theme_few()

ggplot(data = sub_summary_overall_nih, aes(x=age_in_months, y=pc, col=age_in_months)) +
  geom_point() +
  ylim(.25, 1) +
  geom_smooth(span=20) +
  theme_few()

ggplot(data = sub_summary_nih, aes(x=Theta, y=pc, col=as.factor(numAFC))) +
  geom_jitter(width=.01, height=.01) +
  # geom_line(aes(group=numAFC)) +
  geom_smooth(span=20, alpha=.2) +
  theme_few() +
  ylim(.25, 1)

Same overall correlation here

cor.test(sub_summary_overall_nih$Theta,sub_summary_overall_nih$pc)

## 
##  Pearson's product-moment correlation
## 
## data:  sub_summary_overall_nih$Theta and sub_summary_overall_nih$pc
## t = 4.7871, df = 52, p-value = 1.444e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3349134 0.7149572
## sample estimates:
##      cor 
## 0.553076

bing_validation_study_first_pass

Bria Long and Anya Ma

2023-05-19